1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 2 // RUN: -fallow-half-arguments-and-returns -ffp-contract=fast -S -emit-llvm -o - %s \ 3 // RUN: | opt -S -mem2reg \ 4 // RUN: | FileCheck %s 5 6 // Test new aarch64 intrinsics and types 7 8 #include <arm_neon.h> 9 10 // CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 11 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 12 // CHECK: ret <8 x i8> [[ADD_I]] 13 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) { 14 return vadd_s8(v1, v2); 15 } 16 17 // CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 18 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 19 // CHECK: ret <4 x i16> [[ADD_I]] 20 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) { 21 return vadd_s16(v1, v2); 22 } 23 24 // CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 25 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 26 // CHECK: ret <2 x i32> [[ADD_I]] 27 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) { 28 return vadd_s32(v1, v2); 29 } 30 31 // CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %v1, <1 x i64> %v2) #0 { 32 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 33 // CHECK: ret <1 x i64> [[ADD_I]] 34 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) { 35 return vadd_s64(v1, v2); 36 } 37 38 // CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %v1, <2 x float> %v2) #0 { 39 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2 40 // CHECK: ret <2 x float> [[ADD_I]] 41 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) { 42 return vadd_f32(v1, v2); 43 } 44 45 // CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 46 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 47 // CHECK: ret <8 x i8> [[ADD_I]] 48 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) { 49 return vadd_u8(v1, v2); 50 } 51 52 // CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 53 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 54 // CHECK: ret <4 x i16> [[ADD_I]] 55 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) { 56 return vadd_u16(v1, v2); 57 } 58 59 // CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 60 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 61 // CHECK: ret <2 x i32> [[ADD_I]] 62 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) { 63 return vadd_u32(v1, v2); 64 } 65 66 // CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %v1, <1 x i64> %v2) #0 { 67 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 68 // CHECK: ret <1 x i64> [[ADD_I]] 69 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) { 70 return vadd_u64(v1, v2); 71 } 72 73 // CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 74 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 75 // CHECK: ret <16 x i8> [[ADD_I]] 76 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) { 77 return vaddq_s8(v1, v2); 78 } 79 80 // CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 81 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 82 // CHECK: ret <8 x i16> [[ADD_I]] 83 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) { 84 return vaddq_s16(v1, v2); 85 } 86 87 // CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 88 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 89 // CHECK: ret <4 x i32> [[ADD_I]] 90 int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t v2) { 91 return vaddq_s32(v1, v2); 92 } 93 94 // CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 95 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 96 // CHECK: ret <2 x i64> [[ADD_I]] 97 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) { 98 return vaddq_s64(v1, v2); 99 } 100 101 // CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 102 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2 103 // CHECK: ret <4 x float> [[ADD_I]] 104 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) { 105 return vaddq_f32(v1, v2); 106 } 107 108 // CHECK-LABEL: define <2 x double> @test_vaddq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 109 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2 110 // CHECK: ret <2 x double> [[ADD_I]] 111 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) { 112 return vaddq_f64(v1, v2); 113 } 114 115 // CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 116 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 117 // CHECK: ret <16 x i8> [[ADD_I]] 118 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) { 119 return vaddq_u8(v1, v2); 120 } 121 122 // CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 123 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 124 // CHECK: ret <8 x i16> [[ADD_I]] 125 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) { 126 return vaddq_u16(v1, v2); 127 } 128 129 // CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 130 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 131 // CHECK: ret <4 x i32> [[ADD_I]] 132 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) { 133 return vaddq_u32(v1, v2); 134 } 135 136 // CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 137 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 138 // CHECK: ret <2 x i64> [[ADD_I]] 139 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) { 140 return vaddq_u64(v1, v2); 141 } 142 143 // CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 144 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 145 // CHECK: ret <8 x i8> [[SUB_I]] 146 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) { 147 return vsub_s8(v1, v2); 148 } 149 // CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 150 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 151 // CHECK: ret <4 x i16> [[SUB_I]] 152 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) { 153 return vsub_s16(v1, v2); 154 } 155 // CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 156 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 157 // CHECK: ret <2 x i32> [[SUB_I]] 158 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) { 159 return vsub_s32(v1, v2); 160 } 161 162 // CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %v1, <1 x i64> %v2) #0 { 163 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 164 // CHECK: ret <1 x i64> [[SUB_I]] 165 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) { 166 return vsub_s64(v1, v2); 167 } 168 169 // CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %v1, <2 x float> %v2) #0 { 170 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2 171 // CHECK: ret <2 x float> [[SUB_I]] 172 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) { 173 return vsub_f32(v1, v2); 174 } 175 176 // CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 177 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 178 // CHECK: ret <8 x i8> [[SUB_I]] 179 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) { 180 return vsub_u8(v1, v2); 181 } 182 183 // CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 184 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 185 // CHECK: ret <4 x i16> [[SUB_I]] 186 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) { 187 return vsub_u16(v1, v2); 188 } 189 190 // CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 191 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 192 // CHECK: ret <2 x i32> [[SUB_I]] 193 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) { 194 return vsub_u32(v1, v2); 195 } 196 197 // CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %v1, <1 x i64> %v2) #0 { 198 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 199 // CHECK: ret <1 x i64> [[SUB_I]] 200 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) { 201 return vsub_u64(v1, v2); 202 } 203 204 // CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 205 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 206 // CHECK: ret <16 x i8> [[SUB_I]] 207 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) { 208 return vsubq_s8(v1, v2); 209 } 210 211 // CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 212 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 213 // CHECK: ret <8 x i16> [[SUB_I]] 214 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) { 215 return vsubq_s16(v1, v2); 216 } 217 218 // CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 219 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 220 // CHECK: ret <4 x i32> [[SUB_I]] 221 int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t v2) { 222 return vsubq_s32(v1, v2); 223 } 224 225 // CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 226 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 227 // CHECK: ret <2 x i64> [[SUB_I]] 228 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) { 229 return vsubq_s64(v1, v2); 230 } 231 232 // CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 233 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2 234 // CHECK: ret <4 x float> [[SUB_I]] 235 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) { 236 return vsubq_f32(v1, v2); 237 } 238 239 // CHECK-LABEL: define <2 x double> @test_vsubq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 240 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2 241 // CHECK: ret <2 x double> [[SUB_I]] 242 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) { 243 return vsubq_f64(v1, v2); 244 } 245 246 // CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 247 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 248 // CHECK: ret <16 x i8> [[SUB_I]] 249 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) { 250 return vsubq_u8(v1, v2); 251 } 252 253 // CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 254 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 255 // CHECK: ret <8 x i16> [[SUB_I]] 256 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) { 257 return vsubq_u16(v1, v2); 258 } 259 260 // CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 261 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 262 // CHECK: ret <4 x i32> [[SUB_I]] 263 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) { 264 return vsubq_u32(v1, v2); 265 } 266 267 // CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 268 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 269 // CHECK: ret <2 x i64> [[SUB_I]] 270 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) { 271 return vsubq_u64(v1, v2); 272 } 273 274 // CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 275 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 276 // CHECK: ret <8 x i8> [[MUL_I]] 277 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) { 278 return vmul_s8(v1, v2); 279 } 280 281 // CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 282 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 283 // CHECK: ret <4 x i16> [[MUL_I]] 284 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) { 285 return vmul_s16(v1, v2); 286 } 287 288 // CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 289 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 290 // CHECK: ret <2 x i32> [[MUL_I]] 291 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) { 292 return vmul_s32(v1, v2); 293 } 294 295 // CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %v1, <2 x float> %v2) #0 { 296 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2 297 // CHECK: ret <2 x float> [[MUL_I]] 298 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) { 299 return vmul_f32(v1, v2); 300 } 301 302 303 // CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 304 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 305 // CHECK: ret <8 x i8> [[MUL_I]] 306 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) { 307 return vmul_u8(v1, v2); 308 } 309 310 // CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 311 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 312 // CHECK: ret <4 x i16> [[MUL_I]] 313 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) { 314 return vmul_u16(v1, v2); 315 } 316 317 // CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 318 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 319 // CHECK: ret <2 x i32> [[MUL_I]] 320 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) { 321 return vmul_u32(v1, v2); 322 } 323 324 // CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 325 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 326 // CHECK: ret <16 x i8> [[MUL_I]] 327 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) { 328 return vmulq_s8(v1, v2); 329 } 330 331 // CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 332 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 333 // CHECK: ret <8 x i16> [[MUL_I]] 334 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) { 335 return vmulq_s16(v1, v2); 336 } 337 338 // CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 339 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 340 // CHECK: ret <4 x i32> [[MUL_I]] 341 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) { 342 return vmulq_s32(v1, v2); 343 } 344 345 // CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 346 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 347 // CHECK: ret <16 x i8> [[MUL_I]] 348 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) { 349 return vmulq_u8(v1, v2); 350 } 351 352 // CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 353 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 354 // CHECK: ret <8 x i16> [[MUL_I]] 355 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) { 356 return vmulq_u16(v1, v2); 357 } 358 359 // CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 360 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 361 // CHECK: ret <4 x i32> [[MUL_I]] 362 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) { 363 return vmulq_u32(v1, v2); 364 } 365 366 // CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 367 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2 368 // CHECK: ret <4 x float> [[MUL_I]] 369 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) { 370 return vmulq_f32(v1, v2); 371 } 372 373 // CHECK-LABEL: define <2 x double> @test_vmulq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 374 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2 375 // CHECK: ret <2 x double> [[MUL_I]] 376 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) { 377 return vmulq_f64(v1, v2); 378 } 379 380 // CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { 381 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 382 // CHECK: ret <8 x i8> [[VMUL_V_I]] 383 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) { 384 // test_vmul_p8 385 return vmul_p8(v1, v2); 386 // pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 387 } 388 389 // CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { 390 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 391 // CHECK: ret <16 x i8> [[VMULQ_V_I]] 392 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) { 393 // test_vmulq_p8 394 return vmulq_p8(v1, v2); 395 // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 396 } 397 398 399 // CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 400 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 401 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] 402 // CHECK: ret <8 x i8> [[ADD_I]] 403 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 404 return vmla_s8(v1, v2, v3); 405 } 406 407 // CHECK-LABEL: define <8 x i8> @test_vmla_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 408 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 409 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] 410 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8> 411 // CHECK: ret <8 x i8> [[TMP0]] 412 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 413 return vmla_s16(v1, v2, v3); 414 } 415 416 // CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 417 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 418 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] 419 // CHECK: ret <2 x i32> [[ADD_I]] 420 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 421 return vmla_s32(v1, v2, v3); 422 } 423 424 // CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 425 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 426 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]] 427 // CHECK: ret <2 x float> [[ADD_I]] 428 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 429 return vmla_f32(v1, v2, v3); 430 } 431 432 // CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 433 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 434 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] 435 // CHECK: ret <8 x i8> [[ADD_I]] 436 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 437 return vmla_u8(v1, v2, v3); 438 } 439 440 // CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 441 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 442 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] 443 // CHECK: ret <4 x i16> [[ADD_I]] 444 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 445 return vmla_u16(v1, v2, v3); 446 } 447 448 // CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 449 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 450 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] 451 // CHECK: ret <2 x i32> [[ADD_I]] 452 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 453 return vmla_u32(v1, v2, v3); 454 } 455 456 // CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 457 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 458 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] 459 // CHECK: ret <16 x i8> [[ADD_I]] 460 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 461 return vmlaq_s8(v1, v2, v3); 462 } 463 464 // CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 465 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 466 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] 467 // CHECK: ret <8 x i16> [[ADD_I]] 468 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 469 return vmlaq_s16(v1, v2, v3); 470 } 471 472 // CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 473 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 474 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] 475 // CHECK: ret <4 x i32> [[ADD_I]] 476 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 477 return vmlaq_s32(v1, v2, v3); 478 } 479 480 // CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 481 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 482 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]] 483 // CHECK: ret <4 x float> [[ADD_I]] 484 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 485 return vmlaq_f32(v1, v2, v3); 486 } 487 488 // CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 489 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 490 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] 491 // CHECK: ret <16 x i8> [[ADD_I]] 492 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 493 return vmlaq_u8(v1, v2, v3); 494 } 495 496 // CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 497 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 498 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] 499 // CHECK: ret <8 x i16> [[ADD_I]] 500 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 501 return vmlaq_u16(v1, v2, v3); 502 } 503 504 // CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 505 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 506 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] 507 // CHECK: ret <4 x i32> [[ADD_I]] 508 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 509 return vmlaq_u32(v1, v2, v3); 510 } 511 512 // CHECK-LABEL: define <2 x double> @test_vmlaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 513 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 514 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]] 515 // CHECK: ret <2 x double> [[ADD_I]] 516 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 517 return vmlaq_f64(v1, v2, v3); 518 } 519 520 // CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 521 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 522 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] 523 // CHECK: ret <8 x i8> [[SUB_I]] 524 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 525 return vmls_s8(v1, v2, v3); 526 } 527 528 // CHECK-LABEL: define <8 x i8> @test_vmls_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 529 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 530 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] 531 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8> 532 // CHECK: ret <8 x i8> [[TMP0]] 533 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 534 return vmls_s16(v1, v2, v3); 535 } 536 537 // CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 538 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 539 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] 540 // CHECK: ret <2 x i32> [[SUB_I]] 541 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 542 return vmls_s32(v1, v2, v3); 543 } 544 545 // CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 546 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 547 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]] 548 // CHECK: ret <2 x float> [[SUB_I]] 549 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 550 return vmls_f32(v1, v2, v3); 551 } 552 553 // CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 554 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 555 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] 556 // CHECK: ret <8 x i8> [[SUB_I]] 557 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 558 return vmls_u8(v1, v2, v3); 559 } 560 561 // CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 562 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 563 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] 564 // CHECK: ret <4 x i16> [[SUB_I]] 565 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 566 return vmls_u16(v1, v2, v3); 567 } 568 569 // CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 570 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 571 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] 572 // CHECK: ret <2 x i32> [[SUB_I]] 573 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 574 return vmls_u32(v1, v2, v3); 575 } 576 // CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 577 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 578 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] 579 // CHECK: ret <16 x i8> [[SUB_I]] 580 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 581 return vmlsq_s8(v1, v2, v3); 582 } 583 584 // CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 585 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 586 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] 587 // CHECK: ret <8 x i16> [[SUB_I]] 588 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 589 return vmlsq_s16(v1, v2, v3); 590 } 591 592 // CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 593 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 594 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] 595 // CHECK: ret <4 x i32> [[SUB_I]] 596 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 597 return vmlsq_s32(v1, v2, v3); 598 } 599 600 // CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 601 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 602 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]] 603 // CHECK: ret <4 x float> [[SUB_I]] 604 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 605 return vmlsq_f32(v1, v2, v3); 606 } 607 // CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 608 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 609 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] 610 // CHECK: ret <16 x i8> [[SUB_I]] 611 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 612 return vmlsq_u8(v1, v2, v3); 613 } 614 615 // CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 616 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 617 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] 618 // CHECK: ret <8 x i16> [[SUB_I]] 619 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 620 return vmlsq_u16(v1, v2, v3); 621 } 622 623 // CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 624 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 625 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] 626 // CHECK: ret <4 x i32> [[SUB_I]] 627 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 628 return vmlsq_u32(v1, v2, v3); 629 } 630 631 // CHECK-LABEL: define <2 x double> @test_vmlsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 632 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 633 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]] 634 // CHECK: ret <2 x double> [[SUB_I]] 635 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 636 return vmlsq_f64(v1, v2, v3); 637 } 638 // CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 639 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 640 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 641 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 642 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 643 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 644 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 645 // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 646 // CHECK: ret <2 x float> [[TMP6]] 647 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 648 return vfma_f32(v1, v2, v3); 649 } 650 651 // CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 652 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 653 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 654 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 655 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 656 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 657 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 658 // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 659 // CHECK: ret <4 x float> [[TMP6]] 660 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 661 return vfmaq_f32(v1, v2, v3); 662 } 663 664 // CHECK-LABEL: define <2 x double> @test_vfmaq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 665 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 666 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 667 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 668 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 669 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 670 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 671 // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4 672 // CHECK: ret <2 x double> [[TMP6]] 673 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 674 return vfmaq_f64(v1, v2, v3); 675 } 676 // CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 677 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2 678 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 679 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> 680 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 681 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 682 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 683 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 684 // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 685 // CHECK: ret <2 x float> [[TMP6]] 686 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 687 return vfms_f32(v1, v2, v3); 688 } 689 690 // CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 691 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2 692 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 693 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> 694 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 695 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 696 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 697 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 698 // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 699 // CHECK: ret <4 x float> [[TMP6]] 700 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 701 return vfmsq_f32(v1, v2, v3); 702 } 703 704 // CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 705 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2 706 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 707 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> 708 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 709 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 710 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 711 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 712 // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4 713 // CHECK: ret <2 x double> [[TMP6]] 714 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 715 return vfmsq_f64(v1, v2, v3); 716 } 717 718 // CHECK-LABEL: define <2 x double> @test_vdivq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 719 // CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2 720 // CHECK: ret <2 x double> [[DIV_I]] 721 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) { 722 return vdivq_f64(v1, v2); 723 } 724 725 // CHECK-LABEL: define <4 x float> @test_vdivq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 726 // CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2 727 // CHECK: ret <4 x float> [[DIV_I]] 728 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) { 729 return vdivq_f32(v1, v2); 730 } 731 732 // CHECK-LABEL: define <2 x float> @test_vdiv_f32(<2 x float> %v1, <2 x float> %v2) #0 { 733 // CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2 734 // CHECK: ret <2 x float> [[DIV_I]] 735 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { 736 return vdiv_f32(v1, v2); 737 } 738 739 // CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 740 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4 741 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] 742 // CHECK: ret <8 x i8> [[ADD_I]] 743 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 744 return vaba_s8(v1, v2, v3); 745 } 746 747 // CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 748 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 749 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 750 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 751 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 752 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 753 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] 754 // CHECK: ret <4 x i16> [[ADD_I]] 755 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 756 return vaba_s16(v1, v2, v3); 757 } 758 759 // CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 760 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 761 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 762 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 763 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 764 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 765 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] 766 // CHECK: ret <2 x i32> [[ADD_I]] 767 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 768 return vaba_s32(v1, v2, v3); 769 } 770 771 // CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 772 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) #4 773 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] 774 // CHECK: ret <8 x i8> [[ADD_I]] 775 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 776 return vaba_u8(v1, v2, v3); 777 } 778 779 // CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 780 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 781 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 782 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 783 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 784 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 785 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] 786 // CHECK: ret <4 x i16> [[ADD_I]] 787 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 788 return vaba_u16(v1, v2, v3); 789 } 790 791 // CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 792 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 793 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 794 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 795 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 796 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 797 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] 798 // CHECK: ret <2 x i32> [[ADD_I]] 799 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 800 return vaba_u32(v1, v2, v3); 801 } 802 803 // CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 804 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4 805 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] 806 // CHECK: ret <16 x i8> [[ADD_I]] 807 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 808 return vabaq_s8(v1, v2, v3); 809 } 810 811 // CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 813 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 814 // CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 815 // CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 816 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4 817 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] 818 // CHECK: ret <8 x i16> [[ADD_I]] 819 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 820 return vabaq_s16(v1, v2, v3); 821 } 822 823 // CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 824 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 825 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 826 // CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 827 // CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 828 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4 829 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] 830 // CHECK: ret <4 x i32> [[ADD_I]] 831 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 832 return vabaq_s32(v1, v2, v3); 833 } 834 835 // CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 836 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) #4 837 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] 838 // CHECK: ret <16 x i8> [[ADD_I]] 839 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 840 return vabaq_u8(v1, v2, v3); 841 } 842 843 // CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 844 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 845 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 846 // CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 847 // CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 848 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I_I]], <8 x i16> [[VABD1_I_I]]) #4 849 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] 850 // CHECK: ret <8 x i16> [[ADD_I]] 851 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 852 return vabaq_u16(v1, v2, v3); 853 } 854 855 // CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 856 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 857 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 858 // CHECK: [[VABD_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 859 // CHECK: [[VABD1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 860 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I_I]], <4 x i32> [[VABD1_I_I]]) #4 861 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] 862 // CHECK: ret <4 x i32> [[ADD_I]] 863 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 864 return vabaq_u32(v1, v2, v3); 865 } 866 867 // CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 868 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 869 // CHECK: ret <8 x i8> [[VABD_I]] 870 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { 871 return vabd_s8(v1, v2); 872 } 873 874 // CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 875 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 876 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 877 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 878 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 879 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4 880 // CHECK: ret <4 x i16> [[VABD2_I]] 881 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { 882 return vabd_s16(v1, v2); 883 } 884 885 // CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 886 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 887 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 888 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 889 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 890 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4 891 // CHECK: ret <2 x i32> [[VABD2_I]] 892 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { 893 return vabd_s32(v1, v2); 894 } 895 896 // CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 897 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 898 // CHECK: ret <8 x i8> [[VABD_I]] 899 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { 900 return vabd_u8(v1, v2); 901 } 902 903 // CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 904 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 905 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 906 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 907 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 908 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]]) #4 909 // CHECK: ret <4 x i16> [[VABD2_I]] 910 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { 911 return vabd_u16(v1, v2); 912 } 913 914 // CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 915 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 916 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 917 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 918 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 919 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]]) #4 920 // CHECK: ret <2 x i32> [[VABD2_I]] 921 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { 922 return vabd_u32(v1, v2); 923 } 924 925 // CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %v1, <2 x float> %v2) #0 { 926 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 927 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 928 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 929 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 930 // CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]]) #4 931 // CHECK: ret <2 x float> [[VABD2_I]] 932 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { 933 return vabd_f32(v1, v2); 934 } 935 936 // CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 937 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 938 // CHECK: ret <16 x i8> [[VABD_I]] 939 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { 940 return vabdq_s8(v1, v2); 941 } 942 943 // CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 944 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 945 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 946 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 947 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 948 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4 949 // CHECK: ret <8 x i16> [[VABD2_I]] 950 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { 951 return vabdq_s16(v1, v2); 952 } 953 954 // CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 955 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 956 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 957 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 958 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 959 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4 960 // CHECK: ret <4 x i32> [[VABD2_I]] 961 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { 962 return vabdq_s32(v1, v2); 963 } 964 965 // CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 966 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 967 // CHECK: ret <16 x i8> [[VABD_I]] 968 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { 969 return vabdq_u8(v1, v2); 970 } 971 972 // CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 973 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 974 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 975 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 976 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 977 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> [[VABD_I]], <8 x i16> [[VABD1_I]]) #4 978 // CHECK: ret <8 x i16> [[VABD2_I]] 979 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { 980 return vabdq_u16(v1, v2); 981 } 982 983 // CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 984 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 985 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 986 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 987 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 988 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> [[VABD_I]], <4 x i32> [[VABD1_I]]) #4 989 // CHECK: ret <4 x i32> [[VABD2_I]] 990 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { 991 return vabdq_u32(v1, v2); 992 } 993 994 // CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 995 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 996 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 997 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 998 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 999 // CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> [[VABD_I]], <4 x float> [[VABD1_I]]) #4 1000 // CHECK: ret <4 x float> [[VABD2_I]] 1001 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { 1002 return vabdq_f32(v1, v2); 1003 } 1004 1005 // CHECK-LABEL: define <2 x double> @test_vabdq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1006 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1007 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1008 // CHECK: [[VABD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1009 // CHECK: [[VABD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1010 // CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> [[VABD_I]], <2 x double> [[VABD1_I]]) #4 1011 // CHECK: ret <2 x double> [[VABD2_I]] 1012 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { 1013 return vabdq_f64(v1, v2); 1014 } 1015 1016 1017 // CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 1018 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 1019 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1020 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 1021 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 1022 // CHECK: ret <8 x i8> [[VBSL2_I]] 1023 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { 1024 return vbsl_s8(v1, v2, v3); 1025 } 1026 1027 // CHECK-LABEL: define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 1028 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1029 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1030 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 1031 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1032 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1033 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1034 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] 1035 // CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1> 1036 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] 1037 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 1038 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> 1039 // CHECK: ret <8 x i8> [[TMP4]] 1040 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { 1041 return vbsl_s16(v1, v2, v3); 1042 } 1043 1044 // CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 1045 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1046 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1047 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 1048 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1049 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1050 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1051 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] 1052 // CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1> 1053 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]] 1054 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 1055 // CHECK: ret <2 x i32> [[VBSL5_I]] 1056 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { 1057 return vbsl_s32(v1, v2, v3); 1058 } 1059 1060 // CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 { 1061 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1062 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> 1063 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> 1064 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1065 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1066 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 1067 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] 1068 // CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1> 1069 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] 1070 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1071 // CHECK: ret <1 x i64> [[VBSL5_I]] 1072 uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { 1073 return vbsl_s64(v1, v2, v3); 1074 } 1075 1076 // CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 1077 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 1078 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1079 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 1080 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 1081 // CHECK: ret <8 x i8> [[VBSL2_I]] 1082 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 1083 return vbsl_u8(v1, v2, v3); 1084 } 1085 1086 // CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 1087 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1088 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1089 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 1090 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1091 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1092 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1093 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] 1094 // CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1> 1095 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] 1096 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 1097 // CHECK: ret <4 x i16> [[VBSL5_I]] 1098 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 1099 return vbsl_u16(v1, v2, v3); 1100 } 1101 1102 // CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) #0 { 1103 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1104 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1105 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 1106 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1107 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1108 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1109 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] 1110 // CHECK: [[TMP3:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1> 1111 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], [[VBSL2_I]] 1112 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 1113 // CHECK: ret <2 x i32> [[VBSL5_I]] 1114 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 1115 return vbsl_u32(v1, v2, v3); 1116 } 1117 1118 // CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) #0 { 1119 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1120 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> 1121 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> 1122 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1123 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1124 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 1125 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] 1126 // CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1> 1127 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] 1128 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1129 // CHECK: ret <1 x i64> [[VBSL5_I]] 1130 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { 1131 return vbsl_u64(v1, v2, v3); 1132 } 1133 1134 // CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { 1135 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <2 x i32> 1136 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> 1137 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1138 // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 1139 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1140 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1141 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 1142 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]] 1143 // CHECK: [[TMP4:%.*]] = xor <2 x i32> [[VBSL_I]], <i32 -1, i32 -1> 1144 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] 1145 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 1146 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> 1147 // CHECK: ret <2 x float> [[TMP5]] 1148 float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 1149 return vbsl_f32(v1, v2, v3); 1150 } 1151 1152 // CHECK-LABEL: define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) #0 { 1153 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1154 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8> 1155 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8> 1156 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1157 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1158 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 1159 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] 1160 // CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1> 1161 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] 1162 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1163 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double> 1164 // CHECK: ret <1 x double> [[TMP4]] 1165 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) { 1166 return vbsl_f64(v1, v2, v3); 1167 } 1168 1169 // CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) #0 { 1170 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 1171 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1172 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 1173 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 1174 // CHECK: ret <8 x i8> [[VBSL2_I]] 1175 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { 1176 return vbsl_p8(v1, v2, v3); 1177 } 1178 1179 // CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) #0 { 1180 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1181 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1182 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 1183 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1184 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1185 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 1186 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]] 1187 // CHECK: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1> 1188 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] 1189 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 1190 // CHECK: ret <4 x i16> [[VBSL5_I]] 1191 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { 1192 return vbsl_p16(v1, v2, v3); 1193 } 1194 1195 // CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 1196 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1197 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1198 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1199 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1200 // CHECK: ret <16 x i8> [[VBSL2_I]] 1201 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { 1202 return vbslq_s8(v1, v2, v3); 1203 } 1204 1205 // CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 1206 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1207 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1208 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1209 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1210 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1211 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 1212 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] 1213 // CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1214 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] 1215 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1216 // CHECK: ret <8 x i16> [[VBSL5_I]] 1217 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { 1218 return vbslq_s16(v1, v2, v3); 1219 } 1220 1221 // CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 1222 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1223 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1224 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 1225 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1226 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1227 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 1228 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] 1229 // CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1> 1230 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] 1231 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1232 // CHECK: ret <4 x i32> [[VBSL5_I]] 1233 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { 1234 return vbslq_s32(v1, v2, v3); 1235 } 1236 1237 // CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 { 1238 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1239 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1240 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> 1241 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1242 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1243 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 1244 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] 1245 // CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1> 1246 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] 1247 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1248 // CHECK: ret <2 x i64> [[VBSL5_I]] 1249 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { 1250 return vbslq_s64(v1, v2, v3); 1251 } 1252 1253 // CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 1254 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1255 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1256 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1257 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1258 // CHECK: ret <16 x i8> [[VBSL2_I]] 1259 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 1260 return vbslq_u8(v1, v2, v3); 1261 } 1262 1263 // CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 1264 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1265 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1266 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1267 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1268 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1269 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 1270 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] 1271 // CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1272 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] 1273 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1274 // CHECK: ret <8 x i16> [[VBSL5_I]] 1275 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 1276 return vbslq_u16(v1, v2, v3); 1277 } 1278 1279 // CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) #0 { 1280 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1281 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1282 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 1283 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1284 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1285 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 1286 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] 1287 // CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1> 1288 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] 1289 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1290 // CHECK: ret <4 x i32> [[VBSL5_I]] 1291 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { 1292 return vbslq_s32(v1, v2, v3); 1293 } 1294 1295 // CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) #0 { 1296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1297 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1298 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> 1299 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1300 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1301 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 1302 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] 1303 // CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1> 1304 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] 1305 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1306 // CHECK: ret <2 x i64> [[VBSL5_I]] 1307 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { 1308 return vbslq_u64(v1, v2, v3); 1309 } 1310 1311 // CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) #0 { 1312 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1313 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1314 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 1315 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1316 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1317 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 1318 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]] 1319 // CHECK: [[TMP3:%.*]] = xor <4 x i32> [[VBSL_I]], <i32 -1, i32 -1, i32 -1, i32 -1> 1320 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] 1321 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1322 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> 1323 // CHECK: ret <4 x float> [[TMP4]] 1324 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { 1325 return vbslq_f32(v1, v2, v3); 1326 } 1327 1328 // CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) #0 { 1329 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1330 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1331 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1332 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1333 // CHECK: ret <16 x i8> [[VBSL2_I]] 1334 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { 1335 return vbslq_p8(v1, v2, v3); 1336 } 1337 1338 // CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) #0 { 1339 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1340 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1341 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1342 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1343 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1344 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 1345 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]] 1346 // CHECK: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1347 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] 1348 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1349 // CHECK: ret <8 x i16> [[VBSL5_I]] 1350 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { 1351 return vbslq_p16(v1, v2, v3); 1352 } 1353 1354 // CHECK-LABEL: define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) #0 { 1355 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1356 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1357 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 1358 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1359 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1360 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 1361 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] 1362 // CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1> 1363 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] 1364 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1365 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double> 1366 // CHECK: ret <2 x double> [[TMP4]] 1367 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) { 1368 return vbslq_f64(v1, v2, v3); 1369 } 1370 1371 // CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1372 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1373 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1374 // CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1375 // CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1376 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4 1377 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8> 1378 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float> 1379 // CHECK: ret <2 x float> [[TMP2]] 1380 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) { 1381 return vrecps_f32(v1, v2); 1382 } 1383 1384 // CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1385 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1386 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1387 // CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1388 // CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1389 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4 1390 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> 1391 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float> 1392 // CHECK: ret <4 x float> [[TMP2]] 1393 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) { 1394 return vrecpsq_f32(v1, v2); 1395 } 1396 1397 // CHECK-LABEL: define <2 x double> @test_vrecpsq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1398 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1399 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1400 // CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1401 // CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1402 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> [[VRECPSQ_V_I]], <2 x double> [[VRECPSQ_V1_I]]) #4 1403 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8> 1404 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <2 x double> 1405 // CHECK: ret <2 x double> [[TMP2]] 1406 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) { 1407 return vrecpsq_f64(v1, v2); 1408 } 1409 1410 // CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1411 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1412 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1413 // CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1414 // CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1415 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4 1416 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> 1417 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float> 1418 // CHECK: ret <2 x float> [[TMP2]] 1419 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) { 1420 return vrsqrts_f32(v1, v2); 1421 } 1422 1423 // CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1424 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1425 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1426 // CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1427 // CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1428 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4 1429 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> 1430 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float> 1431 // CHECK: ret <4 x float> [[TMP2]] 1432 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) { 1433 return vrsqrtsq_f32(v1, v2); 1434 } 1435 1436 // CHECK-LABEL: define <2 x double> @test_vrsqrtsq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1437 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1438 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1439 // CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1440 // CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1441 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> [[VRSQRTSQ_V_I]], <2 x double> [[VRSQRTSQ_V1_I]]) #4 1442 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8> 1443 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <2 x double> 1444 // CHECK: ret <2 x double> [[TMP2]] 1445 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) { 1446 return vrsqrtsq_f64(v1, v2); 1447 } 1448 1449 // CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1450 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1451 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1452 // CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1453 // CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1454 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4 1455 // CHECK: ret <2 x i32> [[VCAGE_V2_I]] 1456 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { 1457 return vcage_f32(v1, v2); 1458 } 1459 1460 // CHECK-LABEL: define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { 1461 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1462 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1463 // CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1464 // CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1465 // CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCAGE_V_I]], <1 x double> [[VCAGE_V1_I]]) #4 1466 // CHECK: ret <1 x i64> [[VCAGE_V2_I]] 1467 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { 1468 return vcage_f64(a, b); 1469 } 1470 1471 // CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1472 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1473 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1474 // CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1475 // CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1476 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4 1477 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] 1478 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { 1479 return vcageq_f32(v1, v2); 1480 } 1481 1482 // CHECK-LABEL: define <2 x i64> @test_vcageq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1483 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1484 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1485 // CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1486 // CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1487 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCAGEQ_V_I]], <2 x double> [[VCAGEQ_V1_I]]) #4 1488 // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]] 1489 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) { 1490 return vcageq_f64(v1, v2); 1491 } 1492 1493 // CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1494 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1495 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1496 // CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1497 // CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1498 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4 1499 // CHECK: ret <2 x i32> [[VCAGT_V2_I]] 1500 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { 1501 return vcagt_f32(v1, v2); 1502 } 1503 1504 // CHECK-LABEL: define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { 1505 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1506 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1507 // CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1508 // CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1509 // CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCAGT_V_I]], <1 x double> [[VCAGT_V1_I]]) #4 1510 // CHECK: ret <1 x i64> [[VCAGT_V2_I]] 1511 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { 1512 return vcagt_f64(a, b); 1513 } 1514 1515 // CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1516 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1517 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1518 // CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1519 // CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1520 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4 1521 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] 1522 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { 1523 return vcagtq_f32(v1, v2); 1524 } 1525 1526 // CHECK-LABEL: define <2 x i64> @test_vcagtq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1527 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1528 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1529 // CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1530 // CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1531 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCAGTQ_V_I]], <2 x double> [[VCAGTQ_V1_I]]) #4 1532 // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]] 1533 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { 1534 return vcagtq_f64(v1, v2); 1535 } 1536 1537 // CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1538 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1539 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1540 // CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1541 // CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1542 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4 1543 // CHECK: ret <2 x i32> [[VCALE_V2_I]] 1544 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { 1545 return vcale_f32(v1, v2); 1546 // Using registers other than v0, v1 are possible, but would be odd. 1547 } 1548 1549 // CHECK-LABEL: define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { 1550 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1551 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1552 // CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1553 // CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1554 // CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> [[VCALE_V_I]], <1 x double> [[VCALE_V1_I]]) #4 1555 // CHECK: ret <1 x i64> [[VCALE_V2_I]] 1556 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { 1557 return vcale_f64(a, b); 1558 } 1559 1560 // CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1561 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1562 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1563 // CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1564 // CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1565 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4 1566 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]] 1567 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { 1568 return vcaleq_f32(v1, v2); 1569 // Using registers other than v0, v1 are possible, but would be odd. 1570 } 1571 1572 // CHECK-LABEL: define <2 x i64> @test_vcaleq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1573 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1574 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1575 // CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1576 // CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1577 // CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> [[VCALEQ_V_I]], <2 x double> [[VCALEQ_V1_I]]) #4 1578 // CHECK: ret <2 x i64> [[VCALEQ_V2_I]] 1579 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) { 1580 return vcaleq_f64(v1, v2); 1581 // Using registers other than v0, v1 are possible, but would be odd. 1582 } 1583 1584 // CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1585 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1586 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1587 // CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1588 // CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1589 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4 1590 // CHECK: ret <2 x i32> [[VCALT_V2_I]] 1591 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { 1592 return vcalt_f32(v1, v2); 1593 // Using registers other than v0, v1 are possible, but would be odd. 1594 } 1595 1596 // CHECK-LABEL: define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { 1597 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1598 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1599 // CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1600 // CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1601 // CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> [[VCALT_V_I]], <1 x double> [[VCALT_V1_I]]) #4 1602 // CHECK: ret <1 x i64> [[VCALT_V2_I]] 1603 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { 1604 return vcalt_f64(a, b); 1605 } 1606 1607 // CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1608 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1609 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1610 // CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1611 // CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1612 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4 1613 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]] 1614 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { 1615 return vcaltq_f32(v1, v2); 1616 // Using registers other than v0, v1 are possible, but would be odd. 1617 } 1618 1619 // CHECK-LABEL: define <2 x i64> @test_vcaltq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 1620 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1621 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1622 // CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1623 // CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1624 // CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> [[VCALTQ_V_I]], <2 x double> [[VCALTQ_V1_I]]) #4 1625 // CHECK: ret <2 x i64> [[VCALTQ_V2_I]] 1626 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) { 1627 return vcaltq_f64(v1, v2); 1628 // Using registers other than v0, v1 are possible, but would be odd. 1629 } 1630 1631 // CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1632 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1633 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1634 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1635 // CHECK: ret <8 x i8> [[VTST_I]] 1636 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) { 1637 return vtst_s8(v1, v2); 1638 } 1639 1640 // CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1641 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1642 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1643 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1644 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1645 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 1646 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 1647 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 1648 // CHECK: ret <4 x i16> [[VTST_I]] 1649 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) { 1650 return vtst_s16(v1, v2); 1651 } 1652 1653 // CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 1654 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1655 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1656 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1657 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1658 // CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] 1659 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer 1660 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> 1661 // CHECK: ret <2 x i32> [[VTST_I]] 1662 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) { 1663 return vtst_s32(v1, v2); 1664 } 1665 1666 // CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1667 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1668 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1669 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1670 // CHECK: ret <8 x i8> [[VTST_I]] 1671 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) { 1672 return vtst_u8(v1, v2); 1673 } 1674 1675 // CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1676 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1677 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1678 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1679 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1680 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 1681 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 1682 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 1683 // CHECK: ret <4 x i16> [[VTST_I]] 1684 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) { 1685 return vtst_u16(v1, v2); 1686 } 1687 1688 // CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 1689 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1690 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1691 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1692 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1693 // CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] 1694 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer 1695 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> 1696 // CHECK: ret <2 x i32> [[VTST_I]] 1697 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) { 1698 return vtst_u32(v1, v2); 1699 } 1700 1701 // CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1702 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1703 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1704 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1705 // CHECK: ret <16 x i8> [[VTST_I]] 1706 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) { 1707 return vtstq_s8(v1, v2); 1708 } 1709 1710 // CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1711 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1712 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1713 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1714 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1715 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 1716 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 1717 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 1718 // CHECK: ret <8 x i16> [[VTST_I]] 1719 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) { 1720 return vtstq_s16(v1, v2); 1721 } 1722 1723 // CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 1724 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1725 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1726 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1727 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1728 // CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] 1729 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 1730 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 1731 // CHECK: ret <4 x i32> [[VTST_I]] 1732 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) { 1733 return vtstq_s32(v1, v2); 1734 } 1735 1736 // CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1737 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1738 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1739 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1740 // CHECK: ret <16 x i8> [[VTST_I]] 1741 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) { 1742 return vtstq_u8(v1, v2); 1743 } 1744 1745 // CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1746 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1747 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1748 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1749 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1750 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 1751 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 1752 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 1753 // CHECK: ret <8 x i16> [[VTST_I]] 1754 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) { 1755 return vtstq_u16(v1, v2); 1756 } 1757 1758 // CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 1759 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1760 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1761 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1762 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1763 // CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] 1764 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 1765 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 1766 // CHECK: ret <4 x i32> [[VTST_I]] 1767 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) { 1768 return vtstq_u32(v1, v2); 1769 } 1770 1771 // CHECK-LABEL: define <2 x i64> @test_vtstq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 1772 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1773 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1774 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1775 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1776 // CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]] 1777 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer 1778 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> 1779 // CHECK: ret <2 x i64> [[VTST_I]] 1780 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) { 1781 return vtstq_s64(v1, v2); 1782 } 1783 1784 // CHECK-LABEL: define <2 x i64> @test_vtstq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 1785 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1786 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1787 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 1788 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1789 // CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]] 1790 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer 1791 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> 1792 // CHECK: ret <2 x i64> [[VTST_I]] 1793 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) { 1794 return vtstq_u64(v1, v2); 1795 } 1796 1797 // CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1798 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1799 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1800 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1801 // CHECK: ret <8 x i8> [[VTST_I]] 1802 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) { 1803 return vtst_p8(v1, v2); 1804 } 1805 1806 // CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1807 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1808 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1809 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1810 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1811 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 1812 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 1813 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 1814 // CHECK: ret <4 x i16> [[VTST_I]] 1815 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) { 1816 return vtst_p16(v1, v2); 1817 } 1818 1819 // CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1820 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1821 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1822 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1823 // CHECK: ret <16 x i8> [[VTST_I]] 1824 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) { 1825 return vtstq_p8(v1, v2); 1826 } 1827 1828 // CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1829 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1830 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1831 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1832 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1833 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 1834 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 1835 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 1836 // CHECK: ret <8 x i16> [[VTST_I]] 1837 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) { 1838 return vtstq_p16(v1, v2); 1839 } 1840 1841 // CHECK-LABEL: define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) #0 { 1842 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1843 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1844 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1845 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1846 // CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]] 1847 // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer 1848 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> 1849 // CHECK: ret <1 x i64> [[VTST_I]] 1850 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) { 1851 return vtst_s64(a, b); 1852 } 1853 1854 // CHECK-LABEL: define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) #0 { 1855 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1856 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1857 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 1858 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1859 // CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]] 1860 // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer 1861 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> 1862 // CHECK: ret <1 x i64> [[VTST_I]] 1863 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) { 1864 return vtst_u64(a, b); 1865 } 1866 1867 // CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1868 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1869 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1870 // CHECK: ret <8 x i8> [[SEXT_I]] 1871 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) { 1872 return vceq_s8(v1, v2); 1873 } 1874 1875 // CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1876 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 1877 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1878 // CHECK: ret <4 x i16> [[SEXT_I]] 1879 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) { 1880 return vceq_s16(v1, v2); 1881 } 1882 1883 // CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 1884 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 1885 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1886 // CHECK: ret <2 x i32> [[SEXT_I]] 1887 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) { 1888 return vceq_s32(v1, v2); 1889 } 1890 1891 // CHECK-LABEL: define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 { 1892 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b 1893 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1894 // CHECK: ret <1 x i64> [[SEXT_I]] 1895 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) { 1896 return vceq_s64(a, b); 1897 } 1898 1899 // CHECK-LABEL: define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 { 1900 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b 1901 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1902 // CHECK: ret <1 x i64> [[SEXT_I]] 1903 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) { 1904 return vceq_u64(a, b); 1905 } 1906 1907 // CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %v1, <2 x float> %v2) #0 { 1908 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2 1909 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1910 // CHECK: ret <2 x i32> [[SEXT_I]] 1911 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { 1912 return vceq_f32(v1, v2); 1913 } 1914 1915 // CHECK-LABEL: define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 { 1916 // CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b 1917 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1918 // CHECK: ret <1 x i64> [[SEXT_I]] 1919 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) { 1920 return vceq_f64(a, b); 1921 } 1922 1923 // CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1924 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1925 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1926 // CHECK: ret <8 x i8> [[SEXT_I]] 1927 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) { 1928 return vceq_u8(v1, v2); 1929 } 1930 1931 // CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 1932 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 1933 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1934 // CHECK: ret <4 x i16> [[SEXT_I]] 1935 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) { 1936 return vceq_u16(v1, v2); 1937 } 1938 1939 // CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 1940 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 1941 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1942 // CHECK: ret <2 x i32> [[SEXT_I]] 1943 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) { 1944 return vceq_u32(v1, v2); 1945 } 1946 1947 // CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %v1, <8 x i8> %v2) #0 { 1948 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1949 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1950 // CHECK: ret <8 x i8> [[SEXT_I]] 1951 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) { 1952 return vceq_p8(v1, v2); 1953 } 1954 1955 // CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1956 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 1957 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1958 // CHECK: ret <16 x i8> [[SEXT_I]] 1959 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) { 1960 return vceqq_s8(v1, v2); 1961 } 1962 1963 // CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1964 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 1965 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1966 // CHECK: ret <8 x i16> [[SEXT_I]] 1967 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) { 1968 return vceqq_s16(v1, v2); 1969 } 1970 1971 // CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 1972 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 1973 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1974 // CHECK: ret <4 x i32> [[SEXT_I]] 1975 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) { 1976 return vceqq_s32(v1, v2); 1977 } 1978 1979 // CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 1980 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2 1981 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1982 // CHECK: ret <4 x i32> [[SEXT_I]] 1983 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) { 1984 return vceqq_f32(v1, v2); 1985 } 1986 1987 // CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 1988 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 1989 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1990 // CHECK: ret <16 x i8> [[SEXT_I]] 1991 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) { 1992 return vceqq_u8(v1, v2); 1993 } 1994 1995 // CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 1996 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 1997 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1998 // CHECK: ret <8 x i16> [[SEXT_I]] 1999 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) { 2000 return vceqq_u16(v1, v2); 2001 } 2002 2003 // CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2004 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 2005 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2006 // CHECK: ret <4 x i32> [[SEXT_I]] 2007 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) { 2008 return vceqq_u32(v1, v2); 2009 } 2010 2011 // CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2012 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 2013 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2014 // CHECK: ret <16 x i8> [[SEXT_I]] 2015 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) { 2016 return vceqq_p8(v1, v2); 2017 } 2018 2019 2020 // CHECK-LABEL: define <2 x i64> @test_vceqq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2021 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 2022 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2023 // CHECK: ret <2 x i64> [[SEXT_I]] 2024 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) { 2025 return vceqq_s64(v1, v2); 2026 } 2027 2028 // CHECK-LABEL: define <2 x i64> @test_vceqq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2029 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 2030 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2031 // CHECK: ret <2 x i64> [[SEXT_I]] 2032 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) { 2033 return vceqq_u64(v1, v2); 2034 } 2035 2036 // CHECK-LABEL: define <2 x i64> @test_vceqq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2037 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2 2038 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2039 // CHECK: ret <2 x i64> [[SEXT_I]] 2040 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) { 2041 return vceqq_f64(v1, v2); 2042 } 2043 // CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2044 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2 2045 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2046 // CHECK: ret <8 x i8> [[SEXT_I]] 2047 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) { 2048 return vcge_s8(v1, v2); 2049 } 2050 2051 // CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2052 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2 2053 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2054 // CHECK: ret <4 x i16> [[SEXT_I]] 2055 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) { 2056 return vcge_s16(v1, v2); 2057 } 2058 2059 // CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2060 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2 2061 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2062 // CHECK: ret <2 x i32> [[SEXT_I]] 2063 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) { 2064 return vcge_s32(v1, v2); 2065 } 2066 2067 // CHECK-LABEL: define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2068 // CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b 2069 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2070 // CHECK: ret <1 x i64> [[SEXT_I]] 2071 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) { 2072 return vcge_s64(a, b); 2073 } 2074 2075 // CHECK-LABEL: define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2076 // CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b 2077 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2078 // CHECK: ret <1 x i64> [[SEXT_I]] 2079 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) { 2080 return vcge_u64(a, b); 2081 } 2082 2083 // CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %v1, <2 x float> %v2) #0 { 2084 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2 2085 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2086 // CHECK: ret <2 x i32> [[SEXT_I]] 2087 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { 2088 return vcge_f32(v1, v2); 2089 } 2090 2091 // CHECK-LABEL: define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 { 2092 // CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b 2093 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2094 // CHECK: ret <1 x i64> [[SEXT_I]] 2095 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) { 2096 return vcge_f64(a, b); 2097 } 2098 2099 // CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2100 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2 2101 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2102 // CHECK: ret <8 x i8> [[SEXT_I]] 2103 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) { 2104 return vcge_u8(v1, v2); 2105 } 2106 2107 // CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2108 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2 2109 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2110 // CHECK: ret <4 x i16> [[SEXT_I]] 2111 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) { 2112 return vcge_u16(v1, v2); 2113 } 2114 2115 // CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2116 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2 2117 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2118 // CHECK: ret <2 x i32> [[SEXT_I]] 2119 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) { 2120 return vcge_u32(v1, v2); 2121 } 2122 2123 // CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2124 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2 2125 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2126 // CHECK: ret <16 x i8> [[SEXT_I]] 2127 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) { 2128 return vcgeq_s8(v1, v2); 2129 } 2130 2131 // CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2132 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2 2133 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2134 // CHECK: ret <8 x i16> [[SEXT_I]] 2135 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) { 2136 return vcgeq_s16(v1, v2); 2137 } 2138 2139 // CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2140 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2 2141 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2142 // CHECK: ret <4 x i32> [[SEXT_I]] 2143 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) { 2144 return vcgeq_s32(v1, v2); 2145 } 2146 2147 // CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 2148 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2 2149 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2150 // CHECK: ret <4 x i32> [[SEXT_I]] 2151 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) { 2152 return vcgeq_f32(v1, v2); 2153 } 2154 2155 // CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2156 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2 2157 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2158 // CHECK: ret <16 x i8> [[SEXT_I]] 2159 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) { 2160 return vcgeq_u8(v1, v2); 2161 } 2162 2163 // CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2164 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2 2165 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2166 // CHECK: ret <8 x i16> [[SEXT_I]] 2167 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) { 2168 return vcgeq_u16(v1, v2); 2169 } 2170 2171 // CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2172 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2 2173 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2174 // CHECK: ret <4 x i32> [[SEXT_I]] 2175 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) { 2176 return vcgeq_u32(v1, v2); 2177 } 2178 2179 // CHECK-LABEL: define <2 x i64> @test_vcgeq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2180 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2 2181 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2182 // CHECK: ret <2 x i64> [[SEXT_I]] 2183 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) { 2184 return vcgeq_s64(v1, v2); 2185 } 2186 2187 // CHECK-LABEL: define <2 x i64> @test_vcgeq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2188 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2 2189 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2190 // CHECK: ret <2 x i64> [[SEXT_I]] 2191 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) { 2192 return vcgeq_u64(v1, v2); 2193 } 2194 2195 // CHECK-LABEL: define <2 x i64> @test_vcgeq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2196 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2 2197 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2198 // CHECK: ret <2 x i64> [[SEXT_I]] 2199 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) { 2200 return vcgeq_f64(v1, v2); 2201 } 2202 2203 // Notes about vcle: 2204 // LE condition predicate implemented as GE, so check reversed operands. 2205 // Using registers other than v0, v1 are possible, but would be odd. 2206 // CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2207 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2 2208 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2209 // CHECK: ret <8 x i8> [[SEXT_I]] 2210 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) { 2211 return vcle_s8(v1, v2); 2212 } 2213 2214 // CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2215 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2 2216 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2217 // CHECK: ret <4 x i16> [[SEXT_I]] 2218 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) { 2219 return vcle_s16(v1, v2); 2220 } 2221 2222 // CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2223 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2 2224 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2225 // CHECK: ret <2 x i32> [[SEXT_I]] 2226 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) { 2227 return vcle_s32(v1, v2); 2228 } 2229 2230 // CHECK-LABEL: define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2231 // CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b 2232 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2233 // CHECK: ret <1 x i64> [[SEXT_I]] 2234 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) { 2235 return vcle_s64(a, b); 2236 } 2237 2238 // CHECK-LABEL: define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2239 // CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b 2240 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2241 // CHECK: ret <1 x i64> [[SEXT_I]] 2242 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) { 2243 return vcle_u64(a, b); 2244 } 2245 2246 // CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %v1, <2 x float> %v2) #0 { 2247 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2 2248 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2249 // CHECK: ret <2 x i32> [[SEXT_I]] 2250 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { 2251 return vcle_f32(v1, v2); 2252 } 2253 2254 // CHECK-LABEL: define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 { 2255 // CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b 2256 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2257 // CHECK: ret <1 x i64> [[SEXT_I]] 2258 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) { 2259 return vcle_f64(a, b); 2260 } 2261 2262 // CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2263 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2 2264 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2265 // CHECK: ret <8 x i8> [[SEXT_I]] 2266 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) { 2267 return vcle_u8(v1, v2); 2268 } 2269 2270 // CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2271 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2 2272 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2273 // CHECK: ret <4 x i16> [[SEXT_I]] 2274 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) { 2275 return vcle_u16(v1, v2); 2276 } 2277 2278 // CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2279 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2 2280 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2281 // CHECK: ret <2 x i32> [[SEXT_I]] 2282 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) { 2283 return vcle_u32(v1, v2); 2284 } 2285 2286 // CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2287 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2 2288 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2289 // CHECK: ret <16 x i8> [[SEXT_I]] 2290 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) { 2291 return vcleq_s8(v1, v2); 2292 } 2293 2294 // CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2295 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2 2296 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2297 // CHECK: ret <8 x i16> [[SEXT_I]] 2298 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) { 2299 return vcleq_s16(v1, v2); 2300 } 2301 2302 // CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2303 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2 2304 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2305 // CHECK: ret <4 x i32> [[SEXT_I]] 2306 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) { 2307 return vcleq_s32(v1, v2); 2308 } 2309 2310 // CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 2311 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2 2312 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2313 // CHECK: ret <4 x i32> [[SEXT_I]] 2314 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) { 2315 return vcleq_f32(v1, v2); 2316 } 2317 2318 // CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2319 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2 2320 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2321 // CHECK: ret <16 x i8> [[SEXT_I]] 2322 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) { 2323 return vcleq_u8(v1, v2); 2324 } 2325 2326 // CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2327 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2 2328 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2329 // CHECK: ret <8 x i16> [[SEXT_I]] 2330 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) { 2331 return vcleq_u16(v1, v2); 2332 } 2333 2334 // CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2335 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2 2336 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2337 // CHECK: ret <4 x i32> [[SEXT_I]] 2338 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) { 2339 return vcleq_u32(v1, v2); 2340 } 2341 2342 // CHECK-LABEL: define <2 x i64> @test_vcleq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2343 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2 2344 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2345 // CHECK: ret <2 x i64> [[SEXT_I]] 2346 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) { 2347 return vcleq_s64(v1, v2); 2348 } 2349 2350 // CHECK-LABEL: define <2 x i64> @test_vcleq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2351 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2 2352 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2353 // CHECK: ret <2 x i64> [[SEXT_I]] 2354 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) { 2355 return vcleq_u64(v1, v2); 2356 } 2357 2358 // CHECK-LABEL: define <2 x i64> @test_vcleq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2359 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2 2360 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2361 // CHECK: ret <2 x i64> [[SEXT_I]] 2362 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) { 2363 return vcleq_f64(v1, v2); 2364 } 2365 2366 2367 // CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2368 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2 2369 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2370 // CHECK: ret <8 x i8> [[SEXT_I]] 2371 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) { 2372 return vcgt_s8(v1, v2); 2373 } 2374 2375 // CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2376 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2 2377 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2378 // CHECK: ret <4 x i16> [[SEXT_I]] 2379 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) { 2380 return vcgt_s16(v1, v2); 2381 } 2382 2383 // CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2384 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2 2385 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2386 // CHECK: ret <2 x i32> [[SEXT_I]] 2387 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) { 2388 return vcgt_s32(v1, v2); 2389 } 2390 2391 // CHECK-LABEL: define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2392 // CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b 2393 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2394 // CHECK: ret <1 x i64> [[SEXT_I]] 2395 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) { 2396 return vcgt_s64(a, b); 2397 } 2398 2399 // CHECK-LABEL: define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2400 // CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b 2401 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2402 // CHECK: ret <1 x i64> [[SEXT_I]] 2403 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) { 2404 return vcgt_u64(a, b); 2405 } 2406 2407 // CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %v1, <2 x float> %v2) #0 { 2408 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2 2409 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2410 // CHECK: ret <2 x i32> [[SEXT_I]] 2411 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { 2412 return vcgt_f32(v1, v2); 2413 } 2414 2415 // CHECK-LABEL: define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 { 2416 // CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b 2417 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2418 // CHECK: ret <1 x i64> [[SEXT_I]] 2419 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) { 2420 return vcgt_f64(a, b); 2421 } 2422 2423 // CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2424 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2 2425 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2426 // CHECK: ret <8 x i8> [[SEXT_I]] 2427 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) { 2428 return vcgt_u8(v1, v2); 2429 } 2430 2431 // CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2432 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2 2433 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2434 // CHECK: ret <4 x i16> [[SEXT_I]] 2435 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) { 2436 return vcgt_u16(v1, v2); 2437 } 2438 2439 // CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2440 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2 2441 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2442 // CHECK: ret <2 x i32> [[SEXT_I]] 2443 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) { 2444 return vcgt_u32(v1, v2); 2445 } 2446 2447 // CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2448 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2 2449 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2450 // CHECK: ret <16 x i8> [[SEXT_I]] 2451 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) { 2452 return vcgtq_s8(v1, v2); 2453 } 2454 2455 // CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2456 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2 2457 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2458 // CHECK: ret <8 x i16> [[SEXT_I]] 2459 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) { 2460 return vcgtq_s16(v1, v2); 2461 } 2462 2463 // CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2464 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2 2465 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2466 // CHECK: ret <4 x i32> [[SEXT_I]] 2467 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) { 2468 return vcgtq_s32(v1, v2); 2469 } 2470 2471 // CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 2472 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2 2473 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2474 // CHECK: ret <4 x i32> [[SEXT_I]] 2475 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) { 2476 return vcgtq_f32(v1, v2); 2477 } 2478 2479 // CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2480 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2 2481 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2482 // CHECK: ret <16 x i8> [[SEXT_I]] 2483 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) { 2484 return vcgtq_u8(v1, v2); 2485 } 2486 2487 // CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2488 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2 2489 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2490 // CHECK: ret <8 x i16> [[SEXT_I]] 2491 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) { 2492 return vcgtq_u16(v1, v2); 2493 } 2494 2495 // CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2496 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2 2497 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2498 // CHECK: ret <4 x i32> [[SEXT_I]] 2499 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) { 2500 return vcgtq_u32(v1, v2); 2501 } 2502 2503 // CHECK-LABEL: define <2 x i64> @test_vcgtq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2504 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2 2505 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2506 // CHECK: ret <2 x i64> [[SEXT_I]] 2507 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) { 2508 return vcgtq_s64(v1, v2); 2509 } 2510 2511 // CHECK-LABEL: define <2 x i64> @test_vcgtq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2512 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2 2513 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2514 // CHECK: ret <2 x i64> [[SEXT_I]] 2515 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) { 2516 return vcgtq_u64(v1, v2); 2517 } 2518 2519 // CHECK-LABEL: define <2 x i64> @test_vcgtq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2520 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2 2521 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2522 // CHECK: ret <2 x i64> [[SEXT_I]] 2523 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) { 2524 return vcgtq_f64(v1, v2); 2525 } 2526 2527 2528 // Notes about vclt: 2529 // LT condition predicate implemented as GT, so check reversed operands. 2530 // Using registers other than v0, v1 are possible, but would be odd. 2531 2532 // CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2533 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2 2534 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2535 // CHECK: ret <8 x i8> [[SEXT_I]] 2536 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) { 2537 return vclt_s8(v1, v2); 2538 } 2539 2540 // CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2541 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2 2542 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2543 // CHECK: ret <4 x i16> [[SEXT_I]] 2544 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) { 2545 return vclt_s16(v1, v2); 2546 } 2547 2548 // CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2549 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2 2550 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2551 // CHECK: ret <2 x i32> [[SEXT_I]] 2552 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) { 2553 return vclt_s32(v1, v2); 2554 } 2555 2556 // CHECK-LABEL: define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2557 // CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b 2558 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2559 // CHECK: ret <1 x i64> [[SEXT_I]] 2560 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) { 2561 return vclt_s64(a, b); 2562 } 2563 2564 // CHECK-LABEL: define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2565 // CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b 2566 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2567 // CHECK: ret <1 x i64> [[SEXT_I]] 2568 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) { 2569 return vclt_u64(a, b); 2570 } 2571 2572 // CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %v1, <2 x float> %v2) #0 { 2573 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2 2574 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2575 // CHECK: ret <2 x i32> [[SEXT_I]] 2576 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { 2577 return vclt_f32(v1, v2); 2578 } 2579 2580 // CHECK-LABEL: define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 { 2581 // CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b 2582 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2583 // CHECK: ret <1 x i64> [[SEXT_I]] 2584 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) { 2585 return vclt_f64(a, b); 2586 } 2587 2588 // CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2589 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2 2590 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2591 // CHECK: ret <8 x i8> [[SEXT_I]] 2592 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) { 2593 return vclt_u8(v1, v2); 2594 } 2595 2596 // CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2597 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2 2598 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2599 // CHECK: ret <4 x i16> [[SEXT_I]] 2600 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) { 2601 return vclt_u16(v1, v2); 2602 } 2603 2604 // CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2605 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2 2606 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2607 // CHECK: ret <2 x i32> [[SEXT_I]] 2608 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) { 2609 return vclt_u32(v1, v2); 2610 } 2611 2612 // CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2613 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2 2614 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2615 // CHECK: ret <16 x i8> [[SEXT_I]] 2616 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) { 2617 return vcltq_s8(v1, v2); 2618 } 2619 2620 // CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2621 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2 2622 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2623 // CHECK: ret <8 x i16> [[SEXT_I]] 2624 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) { 2625 return vcltq_s16(v1, v2); 2626 } 2627 2628 // CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2629 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2 2630 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2631 // CHECK: ret <4 x i32> [[SEXT_I]] 2632 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) { 2633 return vcltq_s32(v1, v2); 2634 } 2635 2636 // CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %v1, <4 x float> %v2) #0 { 2637 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2 2638 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2639 // CHECK: ret <4 x i32> [[SEXT_I]] 2640 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) { 2641 return vcltq_f32(v1, v2); 2642 } 2643 2644 // CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2645 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2 2646 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2647 // CHECK: ret <16 x i8> [[SEXT_I]] 2648 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) { 2649 return vcltq_u8(v1, v2); 2650 } 2651 2652 // CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2653 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2 2654 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2655 // CHECK: ret <8 x i16> [[SEXT_I]] 2656 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) { 2657 return vcltq_u16(v1, v2); 2658 } 2659 2660 // CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2661 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2 2662 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2663 // CHECK: ret <4 x i32> [[SEXT_I]] 2664 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) { 2665 return vcltq_u32(v1, v2); 2666 } 2667 2668 // CHECK-LABEL: define <2 x i64> @test_vcltq_s64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2669 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2 2670 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2671 // CHECK: ret <2 x i64> [[SEXT_I]] 2672 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) { 2673 return vcltq_s64(v1, v2); 2674 } 2675 2676 // CHECK-LABEL: define <2 x i64> @test_vcltq_u64(<2 x i64> %v1, <2 x i64> %v2) #0 { 2677 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2 2678 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2679 // CHECK: ret <2 x i64> [[SEXT_I]] 2680 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) { 2681 return vcltq_u64(v1, v2); 2682 } 2683 2684 // CHECK-LABEL: define <2 x i64> @test_vcltq_f64(<2 x double> %v1, <2 x double> %v2) #0 { 2685 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2 2686 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2687 // CHECK: ret <2 x i64> [[SEXT_I]] 2688 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { 2689 return vcltq_f64(v1, v2); 2690 } 2691 2692 2693 // CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2694 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2695 // CHECK: ret <8 x i8> [[VHADD_V_I]] 2696 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { 2697 return vhadd_s8(v1, v2); 2698 } 2699 2700 // CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2701 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2702 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2703 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2704 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2705 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 2706 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 2707 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> 2708 // CHECK: ret <4 x i16> [[TMP2]] 2709 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { 2710 return vhadd_s16(v1, v2); 2711 } 2712 2713 // CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2714 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2715 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2716 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2717 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2718 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 2719 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 2720 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> 2721 // CHECK: ret <2 x i32> [[TMP2]] 2722 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { 2723 return vhadd_s32(v1, v2); 2724 } 2725 2726 // CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2727 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2728 // CHECK: ret <8 x i8> [[VHADD_V_I]] 2729 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { 2730 return vhadd_u8(v1, v2); 2731 } 2732 2733 // CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2734 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2735 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2736 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2737 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2738 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 2739 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 2740 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> 2741 // CHECK: ret <4 x i16> [[TMP2]] 2742 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { 2743 return vhadd_u16(v1, v2); 2744 } 2745 2746 // CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2747 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2748 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2749 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2750 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2751 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 2752 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 2753 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> 2754 // CHECK: ret <2 x i32> [[TMP2]] 2755 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { 2756 return vhadd_u32(v1, v2); 2757 } 2758 2759 // CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2760 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 2761 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] 2762 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { 2763 return vhaddq_s8(v1, v2); 2764 } 2765 2766 // CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2767 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2768 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2769 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2770 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2771 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 2772 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 2773 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> 2774 // CHECK: ret <8 x i16> [[TMP2]] 2775 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { 2776 return vhaddq_s16(v1, v2); 2777 } 2778 2779 // CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2780 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2781 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2782 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2783 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2784 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 2785 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 2786 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> 2787 // CHECK: ret <4 x i32> [[TMP2]] 2788 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { 2789 return vhaddq_s32(v1, v2); 2790 } 2791 2792 // CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2793 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 2794 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] 2795 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { 2796 return vhaddq_u8(v1, v2); 2797 } 2798 2799 // CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2800 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2801 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2802 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2803 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2804 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 2805 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 2806 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> 2807 // CHECK: ret <8 x i16> [[TMP2]] 2808 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { 2809 return vhaddq_u16(v1, v2); 2810 } 2811 2812 // CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2813 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2814 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2815 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2816 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2817 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 2818 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 2819 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> 2820 // CHECK: ret <4 x i32> [[TMP2]] 2821 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { 2822 return vhaddq_u32(v1, v2); 2823 } 2824 2825 2826 // CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2827 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2828 // CHECK: ret <8 x i8> [[VHSUB_V_I]] 2829 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) { 2830 return vhsub_s8(v1, v2); 2831 } 2832 2833 // CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2834 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2835 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2836 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2837 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2838 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 2839 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 2840 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> 2841 // CHECK: ret <4 x i16> [[TMP2]] 2842 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) { 2843 return vhsub_s16(v1, v2); 2844 } 2845 2846 // CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2847 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2848 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2849 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2850 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2851 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 2852 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 2853 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> 2854 // CHECK: ret <2 x i32> [[TMP2]] 2855 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) { 2856 return vhsub_s32(v1, v2); 2857 } 2858 2859 // CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2860 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2861 // CHECK: ret <8 x i8> [[VHSUB_V_I]] 2862 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) { 2863 return vhsub_u8(v1, v2); 2864 } 2865 2866 // CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2867 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2868 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2869 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2870 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2871 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 2872 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 2873 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> 2874 // CHECK: ret <4 x i16> [[TMP2]] 2875 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) { 2876 return vhsub_u16(v1, v2); 2877 } 2878 2879 // CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2880 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2881 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2882 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2883 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2884 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 2885 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 2886 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> 2887 // CHECK: ret <2 x i32> [[TMP2]] 2888 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) { 2889 return vhsub_u32(v1, v2); 2890 } 2891 2892 // CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2893 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 2894 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 2895 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) { 2896 return vhsubq_s8(v1, v2); 2897 } 2898 2899 // CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2900 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2901 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2902 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2903 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2904 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 2905 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 2906 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> 2907 // CHECK: ret <8 x i16> [[TMP2]] 2908 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) { 2909 return vhsubq_s16(v1, v2); 2910 } 2911 2912 // CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2914 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2915 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2916 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2917 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 2918 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 2919 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> 2920 // CHECK: ret <4 x i32> [[TMP2]] 2921 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) { 2922 return vhsubq_s32(v1, v2); 2923 } 2924 2925 // CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 2926 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 2927 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 2928 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) { 2929 return vhsubq_u8(v1, v2); 2930 } 2931 2932 // CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 2933 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2934 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2935 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2936 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 2937 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 2938 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 2939 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> 2940 // CHECK: ret <8 x i16> [[TMP2]] 2941 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) { 2942 return vhsubq_u16(v1, v2); 2943 } 2944 2945 // CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 2946 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2947 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2948 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2949 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 2950 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 2951 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 2952 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> 2953 // CHECK: ret <4 x i32> [[TMP2]] 2954 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { 2955 return vhsubq_u32(v1, v2); 2956 } 2957 2958 2959 // CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2960 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2961 // CHECK: ret <8 x i8> [[VRHADD_V_I]] 2962 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { 2963 return vrhadd_s8(v1, v2); 2964 } 2965 2966 // CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %v1, <4 x i16> %v2) #0 { 2967 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2968 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2969 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2970 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2971 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 2972 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 2973 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> 2974 // CHECK: ret <4 x i16> [[TMP2]] 2975 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { 2976 return vrhadd_s16(v1, v2); 2977 } 2978 2979 // CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %v1, <2 x i32> %v2) #0 { 2980 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2981 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2982 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2983 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2984 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 2985 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 2986 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> 2987 // CHECK: ret <2 x i32> [[TMP2]] 2988 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { 2989 return vrhadd_s32(v1, v2); 2990 } 2991 2992 // CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %v1, <8 x i8> %v2) #0 { 2993 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) #4 2994 // CHECK: ret <8 x i8> [[VRHADD_V_I]] 2995 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { 2996 return vrhadd_u8(v1, v2); 2997 } 2998 2999 // CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %v1, <4 x i16> %v2) #0 { 3000 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 3001 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 3002 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3003 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3004 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 3005 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 3006 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> 3007 // CHECK: ret <4 x i16> [[TMP2]] 3008 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { 3009 return vrhadd_u16(v1, v2); 3010 } 3011 3012 // CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %v1, <2 x i32> %v2) #0 { 3013 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 3014 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 3015 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3016 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3017 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 3018 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 3019 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> 3020 // CHECK: ret <2 x i32> [[TMP2]] 3021 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { 3022 return vrhadd_u32(v1, v2); 3023 } 3024 3025 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %v1, <16 x i8> %v2) #0 { 3026 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 3027 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 3028 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { 3029 return vrhaddq_s8(v1, v2); 3030 } 3031 3032 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %v1, <8 x i16> %v2) #0 { 3033 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 3034 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 3035 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3036 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3037 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 3038 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 3039 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> 3040 // CHECK: ret <8 x i16> [[TMP2]] 3041 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { 3042 return vrhaddq_s16(v1, v2); 3043 } 3044 3045 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %v1, <4 x i32> %v2) #0 { 3046 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 3047 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 3048 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3049 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3050 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 3051 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 3052 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> 3053 // CHECK: ret <4 x i32> [[TMP2]] 3054 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { 3055 return vrhaddq_s32(v1, v2); 3056 } 3057 3058 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %v1, <16 x i8> %v2) #0 { 3059 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) #4 3060 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 3061 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { 3062 return vrhaddq_u8(v1, v2); 3063 } 3064 3065 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %v1, <8 x i16> %v2) #0 { 3066 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 3067 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 3068 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3069 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3070 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 3071 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 3072 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> 3073 // CHECK: ret <8 x i16> [[TMP2]] 3074 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) { 3075 return vrhaddq_u16(v1, v2); 3076 } 3077 3078 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %v1, <4 x i32> %v2) #0 { 3079 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 3080 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 3081 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3082 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3083 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 3084 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 3085 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> 3086 // CHECK: ret <4 x i32> [[TMP2]] 3087 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) { 3088 return vrhaddq_u32(v1, v2); 3089 } 3090 // CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3091 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3092 // CHECK: ret <8 x i8> [[VQADD_V_I]] 3093 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { 3094 return vqadd_s8(a, b); 3095 } 3096 3097 // CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3098 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3099 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3100 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3101 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3102 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 3103 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 3104 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> 3105 // CHECK: ret <4 x i16> [[TMP2]] 3106 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { 3107 return vqadd_s16(a, b); 3108 } 3109 3110 // CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3111 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3112 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3113 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3114 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3115 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 3116 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 3117 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> 3118 // CHECK: ret <2 x i32> [[TMP2]] 3119 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { 3120 return vqadd_s32(a, b); 3121 } 3122 3123 // CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3124 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3125 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3126 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3127 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3128 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 3129 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 3130 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> 3131 // CHECK: ret <1 x i64> [[TMP2]] 3132 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { 3133 return vqadd_s64(a, b); 3134 } 3135 3136 // CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3137 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3138 // CHECK: ret <8 x i8> [[VQADD_V_I]] 3139 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { 3140 return vqadd_u8(a, b); 3141 } 3142 3143 // CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3146 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3147 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3148 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 3149 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 3150 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> 3151 // CHECK: ret <4 x i16> [[TMP2]] 3152 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { 3153 return vqadd_u16(a, b); 3154 } 3155 3156 // CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3157 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3158 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3159 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3160 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3161 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 3162 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 3163 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> 3164 // CHECK: ret <2 x i32> [[TMP2]] 3165 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { 3166 return vqadd_u32(a, b); 3167 } 3168 3169 // CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3170 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3171 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3172 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3173 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3174 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 3175 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 3176 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> 3177 // CHECK: ret <1 x i64> [[TMP2]] 3178 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { 3179 return vqadd_u64(a, b); 3180 } 3181 3182 // CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3183 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3184 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] 3185 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { 3186 return vqaddq_s8(a, b); 3187 } 3188 3189 // CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3190 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3191 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3192 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3193 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3194 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 3195 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 3196 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> 3197 // CHECK: ret <8 x i16> [[TMP2]] 3198 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { 3199 return vqaddq_s16(a, b); 3200 } 3201 3202 // CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3203 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3204 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3205 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3206 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3207 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 3208 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 3209 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> 3210 // CHECK: ret <4 x i32> [[TMP2]] 3211 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { 3212 return vqaddq_s32(a, b); 3213 } 3214 3215 // CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3216 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3217 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3218 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3219 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3220 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 3221 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 3222 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> 3223 // CHECK: ret <2 x i64> [[TMP2]] 3224 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { 3225 return vqaddq_s64(a, b); 3226 } 3227 3228 // CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3229 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3230 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] 3231 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { 3232 return vqaddq_u8(a, b); 3233 } 3234 3235 // CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3236 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3237 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3238 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3239 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3240 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 3241 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 3242 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> 3243 // CHECK: ret <8 x i16> [[TMP2]] 3244 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { 3245 return vqaddq_u16(a, b); 3246 } 3247 3248 // CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3249 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3250 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3251 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3252 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3253 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 3254 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 3255 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> 3256 // CHECK: ret <4 x i32> [[TMP2]] 3257 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { 3258 return vqaddq_u32(a, b); 3259 } 3260 3261 // CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3262 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3263 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3264 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3265 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3266 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 3267 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 3268 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> 3269 // CHECK: ret <2 x i64> [[TMP2]] 3270 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { 3271 return vqaddq_u64(a, b); 3272 } 3273 3274 3275 // CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3276 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3277 // CHECK: ret <8 x i8> [[VQSUB_V_I]] 3278 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { 3279 return vqsub_s8(a, b); 3280 } 3281 3282 // CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3284 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3285 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3286 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3287 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 3288 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 3289 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> 3290 // CHECK: ret <4 x i16> [[TMP2]] 3291 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { 3292 return vqsub_s16(a, b); 3293 } 3294 3295 // CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3297 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3298 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3299 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3300 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 3301 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 3302 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> 3303 // CHECK: ret <2 x i32> [[TMP2]] 3304 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { 3305 return vqsub_s32(a, b); 3306 } 3307 3308 // CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3309 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3310 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3311 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3312 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3313 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 3314 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 3315 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> 3316 // CHECK: ret <1 x i64> [[TMP2]] 3317 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { 3318 return vqsub_s64(a, b); 3319 } 3320 3321 // CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3322 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3323 // CHECK: ret <8 x i8> [[VQSUB_V_I]] 3324 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { 3325 return vqsub_u8(a, b); 3326 } 3327 3328 // CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3329 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3330 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3331 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3332 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3333 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 3334 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 3335 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> 3336 // CHECK: ret <4 x i16> [[TMP2]] 3337 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { 3338 return vqsub_u16(a, b); 3339 } 3340 3341 // CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3342 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3343 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3344 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3345 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3346 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 3347 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 3348 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> 3349 // CHECK: ret <2 x i32> [[TMP2]] 3350 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { 3351 return vqsub_u32(a, b); 3352 } 3353 3354 // CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3355 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3356 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3357 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3358 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3359 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 3360 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 3361 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> 3362 // CHECK: ret <1 x i64> [[TMP2]] 3363 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { 3364 return vqsub_u64(a, b); 3365 } 3366 3367 // CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3368 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3369 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 3370 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { 3371 return vqsubq_s8(a, b); 3372 } 3373 3374 // CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3375 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3376 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3377 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3378 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3379 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 3380 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 3381 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> 3382 // CHECK: ret <8 x i16> [[TMP2]] 3383 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { 3384 return vqsubq_s16(a, b); 3385 } 3386 3387 // CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3388 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3389 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3390 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3391 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3392 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 3393 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 3394 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> 3395 // CHECK: ret <4 x i32> [[TMP2]] 3396 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { 3397 return vqsubq_s32(a, b); 3398 } 3399 3400 // CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3401 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3402 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3403 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3404 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3405 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 3406 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 3407 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> 3408 // CHECK: ret <2 x i64> [[TMP2]] 3409 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { 3410 return vqsubq_s64(a, b); 3411 } 3412 3413 // CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3414 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3415 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 3416 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { 3417 return vqsubq_u8(a, b); 3418 } 3419 3420 // CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3421 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3422 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3423 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3424 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3425 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 3426 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 3427 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> 3428 // CHECK: ret <8 x i16> [[TMP2]] 3429 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { 3430 return vqsubq_u16(a, b); 3431 } 3432 3433 // CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3434 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3435 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3436 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3437 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3438 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 3439 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 3440 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> 3441 // CHECK: ret <4 x i32> [[TMP2]] 3442 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { 3443 return vqsubq_u32(a, b); 3444 } 3445 3446 // CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3447 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3448 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3449 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3450 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3451 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 3452 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 3453 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> 3454 // CHECK: ret <2 x i64> [[TMP2]] 3455 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { 3456 return vqsubq_u64(a, b); 3457 } 3458 3459 3460 // CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3461 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3462 // CHECK: ret <8 x i8> [[VSHL_V_I]] 3463 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { 3464 return vshl_s8(a, b); 3465 } 3466 3467 // CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3470 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3471 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3472 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 3473 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 3474 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> 3475 // CHECK: ret <4 x i16> [[TMP2]] 3476 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { 3477 return vshl_s16(a, b); 3478 } 3479 3480 // CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3481 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3482 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3483 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3484 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3485 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 3486 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 3487 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> 3488 // CHECK: ret <2 x i32> [[TMP2]] 3489 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { 3490 return vshl_s32(a, b); 3491 } 3492 3493 // CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3494 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3495 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3496 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3497 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3498 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 3499 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 3500 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> 3501 // CHECK: ret <1 x i64> [[TMP2]] 3502 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { 3503 return vshl_s64(a, b); 3504 } 3505 3506 // CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3507 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3508 // CHECK: ret <8 x i8> [[VSHL_V_I]] 3509 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { 3510 return vshl_u8(a, b); 3511 } 3512 3513 // CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3514 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3515 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3516 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3517 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3518 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 3519 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 3520 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> 3521 // CHECK: ret <4 x i16> [[TMP2]] 3522 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { 3523 return vshl_u16(a, b); 3524 } 3525 3526 // CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3527 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3528 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3529 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3530 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3531 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 3532 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 3533 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> 3534 // CHECK: ret <2 x i32> [[TMP2]] 3535 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { 3536 return vshl_u32(a, b); 3537 } 3538 3539 // CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3540 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3541 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3542 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3543 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3544 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 3545 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 3546 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> 3547 // CHECK: ret <1 x i64> [[TMP2]] 3548 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { 3549 return vshl_u64(a, b); 3550 } 3551 3552 // CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3553 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3554 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] 3555 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { 3556 return vshlq_s8(a, b); 3557 } 3558 3559 // CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3560 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3561 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3562 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3563 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3564 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 3565 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 3566 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> 3567 // CHECK: ret <8 x i16> [[TMP2]] 3568 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { 3569 return vshlq_s16(a, b); 3570 } 3571 3572 // CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3573 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3574 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3575 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3576 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3577 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 3578 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 3579 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> 3580 // CHECK: ret <4 x i32> [[TMP2]] 3581 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { 3582 return vshlq_s32(a, b); 3583 } 3584 3585 // CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3586 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3587 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3588 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3589 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3590 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 3591 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 3592 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> 3593 // CHECK: ret <2 x i64> [[TMP2]] 3594 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { 3595 return vshlq_s64(a, b); 3596 } 3597 3598 // CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3599 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3600 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] 3601 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { 3602 return vshlq_u8(a, b); 3603 } 3604 3605 // CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3606 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3607 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3608 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3609 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3610 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 3611 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 3612 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> 3613 // CHECK: ret <8 x i16> [[TMP2]] 3614 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { 3615 return vshlq_u16(a, b); 3616 } 3617 3618 // CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3619 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3620 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3621 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3622 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3623 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 3624 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 3625 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> 3626 // CHECK: ret <4 x i32> [[TMP2]] 3627 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { 3628 return vshlq_u32(a, b); 3629 } 3630 3631 // CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3632 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3633 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3634 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3635 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3636 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 3637 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 3638 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> 3639 // CHECK: ret <2 x i64> [[TMP2]] 3640 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { 3641 return vshlq_u64(a, b); 3642 } 3643 3644 3645 // CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3646 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3647 // CHECK: ret <8 x i8> [[VQSHL_V_I]] 3648 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { 3649 return vqshl_s8(a, b); 3650 } 3651 3652 // CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3653 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3654 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3655 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3656 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3657 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 3658 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 3659 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> 3660 // CHECK: ret <4 x i16> [[TMP2]] 3661 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { 3662 return vqshl_s16(a, b); 3663 } 3664 3665 // CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3666 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3667 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3668 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3669 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3670 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 3671 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 3672 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> 3673 // CHECK: ret <2 x i32> [[TMP2]] 3674 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { 3675 return vqshl_s32(a, b); 3676 } 3677 3678 // CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3679 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3680 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3681 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3682 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3683 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 3684 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 3685 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> 3686 // CHECK: ret <1 x i64> [[TMP2]] 3687 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { 3688 return vqshl_s64(a, b); 3689 } 3690 3691 // CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3692 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3693 // CHECK: ret <8 x i8> [[VQSHL_V_I]] 3694 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { 3695 return vqshl_u8(a, b); 3696 } 3697 3698 // CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3699 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3700 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3701 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3702 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3703 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 3704 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 3705 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> 3706 // CHECK: ret <4 x i16> [[TMP2]] 3707 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { 3708 return vqshl_u16(a, b); 3709 } 3710 3711 // CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3712 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3713 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3714 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3715 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3716 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 3717 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 3718 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> 3719 // CHECK: ret <2 x i32> [[TMP2]] 3720 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { 3721 return vqshl_u32(a, b); 3722 } 3723 3724 // CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3725 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3726 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3727 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3728 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3729 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 3730 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 3731 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> 3732 // CHECK: ret <1 x i64> [[TMP2]] 3733 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { 3734 return vqshl_u64(a, b); 3735 } 3736 3737 // CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3738 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3739 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 3740 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { 3741 return vqshlq_s8(a, b); 3742 } 3743 3744 // CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3745 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3746 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3747 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3748 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3749 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 3750 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 3751 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> 3752 // CHECK: ret <8 x i16> [[TMP2]] 3753 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { 3754 return vqshlq_s16(a, b); 3755 } 3756 3757 // CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3758 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3759 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3760 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3761 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3762 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 3763 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 3764 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> 3765 // CHECK: ret <4 x i32> [[TMP2]] 3766 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { 3767 return vqshlq_s32(a, b); 3768 } 3769 3770 // CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3772 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3773 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3774 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3775 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 3776 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 3777 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> 3778 // CHECK: ret <2 x i64> [[TMP2]] 3779 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { 3780 return vqshlq_s64(a, b); 3781 } 3782 3783 // CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3784 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3785 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 3786 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { 3787 return vqshlq_u8(a, b); 3788 } 3789 3790 // CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3791 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3792 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3793 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3794 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3795 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 3796 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 3797 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> 3798 // CHECK: ret <8 x i16> [[TMP2]] 3799 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { 3800 return vqshlq_u16(a, b); 3801 } 3802 3803 // CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3804 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3805 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3806 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3807 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3808 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 3809 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 3810 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> 3811 // CHECK: ret <4 x i32> [[TMP2]] 3812 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { 3813 return vqshlq_u32(a, b); 3814 } 3815 3816 // CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3817 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3818 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3819 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3820 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3821 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 3822 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 3823 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> 3824 // CHECK: ret <2 x i64> [[TMP2]] 3825 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { 3826 return vqshlq_u64(a, b); 3827 } 3828 3829 // CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3830 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3831 // CHECK: ret <8 x i8> [[VRSHL_V_I]] 3832 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { 3833 return vrshl_s8(a, b); 3834 } 3835 3836 // CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3837 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3838 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3839 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3840 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3841 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 3842 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 3843 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> 3844 // CHECK: ret <4 x i16> [[TMP2]] 3845 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { 3846 return vrshl_s16(a, b); 3847 } 3848 3849 // CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3850 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3851 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3852 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3853 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3854 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 3855 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 3856 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> 3857 // CHECK: ret <2 x i32> [[TMP2]] 3858 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { 3859 return vrshl_s32(a, b); 3860 } 3861 3862 // CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3863 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3864 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3865 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3866 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3867 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 3868 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 3869 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> 3870 // CHECK: ret <1 x i64> [[TMP2]] 3871 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { 3872 return vrshl_s64(a, b); 3873 } 3874 3875 // CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3876 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3877 // CHECK: ret <8 x i8> [[VRSHL_V_I]] 3878 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { 3879 return vrshl_u8(a, b); 3880 } 3881 3882 // CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3883 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3884 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3885 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3886 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3887 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 3888 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 3889 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> 3890 // CHECK: ret <4 x i16> [[TMP2]] 3891 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { 3892 return vrshl_u16(a, b); 3893 } 3894 3895 // CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3897 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3898 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3899 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3900 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 3901 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 3902 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> 3903 // CHECK: ret <2 x i32> [[TMP2]] 3904 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { 3905 return vrshl_u32(a, b); 3906 } 3907 3908 // CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3909 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3910 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3911 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3912 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3913 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 3914 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 3915 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> 3916 // CHECK: ret <1 x i64> [[TMP2]] 3917 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { 3918 return vrshl_u64(a, b); 3919 } 3920 3921 // CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3922 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3923 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 3924 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { 3925 return vrshlq_s8(a, b); 3926 } 3927 3928 // CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3929 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3930 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3931 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3932 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3933 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 3934 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 3935 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> 3936 // CHECK: ret <8 x i16> [[TMP2]] 3937 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { 3938 return vrshlq_s16(a, b); 3939 } 3940 3941 // CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3942 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3943 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3944 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3945 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3946 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 3947 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 3948 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> 3949 // CHECK: ret <4 x i32> [[TMP2]] 3950 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { 3951 return vrshlq_s32(a, b); 3952 } 3953 3954 // CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3955 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3956 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3957 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3958 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3959 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 3960 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 3961 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> 3962 // CHECK: ret <2 x i64> [[TMP2]] 3963 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { 3964 return vrshlq_s64(a, b); 3965 } 3966 3967 // CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3968 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3969 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 3970 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { 3971 return vrshlq_u8(a, b); 3972 } 3973 3974 // CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3975 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3976 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3977 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3978 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3979 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 3980 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 3981 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> 3982 // CHECK: ret <8 x i16> [[TMP2]] 3983 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { 3984 return vrshlq_u16(a, b); 3985 } 3986 3987 // CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3988 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3989 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3990 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3991 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3992 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 3993 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 3994 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> 3995 // CHECK: ret <4 x i32> [[TMP2]] 3996 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { 3997 return vrshlq_u32(a, b); 3998 } 3999 4000 // CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 4001 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4002 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4003 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4004 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4005 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 4006 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 4007 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> 4008 // CHECK: ret <2 x i64> [[TMP2]] 4009 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { 4010 return vrshlq_u64(a, b); 4011 } 4012 4013 4014 // CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4015 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4016 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] 4017 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { 4018 return vqrshl_s8(a, b); 4019 } 4020 4021 // CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4022 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4023 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4024 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4025 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4026 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 4027 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 4028 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> 4029 // CHECK: ret <4 x i16> [[TMP2]] 4030 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { 4031 return vqrshl_s16(a, b); 4032 } 4033 4034 // CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4035 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4036 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4037 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4038 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4039 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 4040 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 4041 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> 4042 // CHECK: ret <2 x i32> [[TMP2]] 4043 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { 4044 return vqrshl_s32(a, b); 4045 } 4046 4047 // CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 4048 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 4049 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4050 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 4051 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4052 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 4053 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 4054 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> 4055 // CHECK: ret <1 x i64> [[TMP2]] 4056 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { 4057 return vqrshl_s64(a, b); 4058 } 4059 4060 // CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4061 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4062 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] 4063 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { 4064 return vqrshl_u8(a, b); 4065 } 4066 4067 // CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4068 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4069 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4070 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4071 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4072 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 4073 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 4074 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> 4075 // CHECK: ret <4 x i16> [[TMP2]] 4076 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { 4077 return vqrshl_u16(a, b); 4078 } 4079 4080 // CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4081 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4082 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4083 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4084 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4085 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 4086 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 4087 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> 4088 // CHECK: ret <2 x i32> [[TMP2]] 4089 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { 4090 return vqrshl_u32(a, b); 4091 } 4092 4093 // CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 4094 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 4095 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4096 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 4097 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4098 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 4099 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 4100 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> 4101 // CHECK: ret <1 x i64> [[TMP2]] 4102 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { 4103 return vqrshl_u64(a, b); 4104 } 4105 4106 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4107 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4108 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 4109 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { 4110 return vqrshlq_s8(a, b); 4111 } 4112 4113 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4114 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4115 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4116 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4117 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4118 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 4119 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 4120 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> 4121 // CHECK: ret <8 x i16> [[TMP2]] 4122 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { 4123 return vqrshlq_s16(a, b); 4124 } 4125 4126 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4127 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4128 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4129 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4130 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4131 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 4132 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 4133 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> 4134 // CHECK: ret <4 x i32> [[TMP2]] 4135 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { 4136 return vqrshlq_s32(a, b); 4137 } 4138 4139 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 4140 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4141 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4142 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4143 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4144 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 4145 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 4146 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> 4147 // CHECK: ret <2 x i64> [[TMP2]] 4148 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { 4149 return vqrshlq_s64(a, b); 4150 } 4151 4152 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4153 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4154 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 4155 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { 4156 return vqrshlq_u8(a, b); 4157 } 4158 4159 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4160 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4161 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4162 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4163 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4164 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 4165 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 4166 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> 4167 // CHECK: ret <8 x i16> [[TMP2]] 4168 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { 4169 return vqrshlq_u16(a, b); 4170 } 4171 4172 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4173 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4174 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4175 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4176 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4177 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 4178 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 4179 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> 4180 // CHECK: ret <4 x i32> [[TMP2]] 4181 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { 4182 return vqrshlq_u32(a, b); 4183 } 4184 4185 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 4186 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4187 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4188 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4189 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4190 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 4191 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 4192 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> 4193 // CHECK: ret <2 x i64> [[TMP2]] 4194 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { 4195 return vqrshlq_u64(a, b); 4196 } 4197 4198 // CHECK-LABEL: define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) #0 { 4199 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 4200 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4201 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 4202 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4203 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0) 4204 // CHECK: ret <1 x i64> [[VSLI_N2]] 4205 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) { 4206 return vsli_n_p64(a, b, 0); 4207 } 4208 4209 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 { 4210 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4211 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4212 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4213 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4214 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0) 4215 // CHECK: ret <2 x i64> [[VSLI_N2]] 4216 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) { 4217 return vsliq_n_p64(a, b, 0); 4218 } 4219 4220 // CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4221 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4222 // CHECK: ret <8 x i8> [[VMAX_I]] 4223 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { 4224 return vmax_s8(a, b); 4225 } 4226 4227 // CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4228 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4229 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4230 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4231 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4232 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4 4233 // CHECK: ret <4 x i16> [[VMAX2_I]] 4234 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { 4235 return vmax_s16(a, b); 4236 } 4237 4238 // CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4239 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4240 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4241 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4242 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4243 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4 4244 // CHECK: ret <2 x i32> [[VMAX2_I]] 4245 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { 4246 return vmax_s32(a, b); 4247 } 4248 4249 // CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4250 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4251 // CHECK: ret <8 x i8> [[VMAX_I]] 4252 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { 4253 return vmax_u8(a, b); 4254 } 4255 4256 // CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4257 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4258 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4259 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4260 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4261 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> [[VMAX_I]], <4 x i16> [[VMAX1_I]]) #4 4262 // CHECK: ret <4 x i16> [[VMAX2_I]] 4263 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { 4264 return vmax_u16(a, b); 4265 } 4266 4267 // CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4268 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4269 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4270 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4271 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4272 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> [[VMAX_I]], <2 x i32> [[VMAX1_I]]) #4 4273 // CHECK: ret <2 x i32> [[VMAX2_I]] 4274 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { 4275 return vmax_u32(a, b); 4276 } 4277 4278 // CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 { 4279 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4280 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4281 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4282 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4283 // CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> [[VMAX_I]], <2 x float> [[VMAX1_I]]) #4 4284 // CHECK: ret <2 x float> [[VMAX2_I]] 4285 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { 4286 return vmax_f32(a, b); 4287 } 4288 4289 // CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4290 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4291 // CHECK: ret <16 x i8> [[VMAX_I]] 4292 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { 4293 return vmaxq_s8(a, b); 4294 } 4295 4296 // CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4297 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4298 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4299 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4300 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4301 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4 4302 // CHECK: ret <8 x i16> [[VMAX2_I]] 4303 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { 4304 return vmaxq_s16(a, b); 4305 } 4306 4307 // CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4308 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4309 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4310 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4311 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4312 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4 4313 // CHECK: ret <4 x i32> [[VMAX2_I]] 4314 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { 4315 return vmaxq_s32(a, b); 4316 } 4317 4318 // CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4319 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4320 // CHECK: ret <16 x i8> [[VMAX_I]] 4321 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { 4322 return vmaxq_u8(a, b); 4323 } 4324 4325 // CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4326 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4327 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4328 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4329 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4330 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> [[VMAX_I]], <8 x i16> [[VMAX1_I]]) #4 4331 // CHECK: ret <8 x i16> [[VMAX2_I]] 4332 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { 4333 return vmaxq_u16(a, b); 4334 } 4335 4336 // CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4339 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4340 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4341 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> [[VMAX_I]], <4 x i32> [[VMAX1_I]]) #4 4342 // CHECK: ret <4 x i32> [[VMAX2_I]] 4343 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { 4344 return vmaxq_u32(a, b); 4345 } 4346 4347 // CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { 4348 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4349 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4350 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4351 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4352 // CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> [[VMAX_I]], <4 x float> [[VMAX1_I]]) #4 4353 // CHECK: ret <4 x float> [[VMAX2_I]] 4354 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { 4355 return vmaxq_f32(a, b); 4356 } 4357 4358 // CHECK-LABEL: define <2 x double> @test_vmaxq_f64(<2 x double> %a, <2 x double> %b) #0 { 4359 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4360 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4361 // CHECK: [[VMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4362 // CHECK: [[VMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4363 // CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> [[VMAX_I]], <2 x double> [[VMAX1_I]]) #4 4364 // CHECK: ret <2 x double> [[VMAX2_I]] 4365 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) { 4366 return vmaxq_f64(a, b); 4367 } 4368 4369 4370 // CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4371 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4372 // CHECK: ret <8 x i8> [[VMIN_I]] 4373 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { 4374 return vmin_s8(a, b); 4375 } 4376 4377 // CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4378 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4379 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4380 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4381 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4382 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4 4383 // CHECK: ret <4 x i16> [[VMIN2_I]] 4384 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { 4385 return vmin_s16(a, b); 4386 } 4387 4388 // CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4389 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4390 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4391 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4392 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4393 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4 4394 // CHECK: ret <2 x i32> [[VMIN2_I]] 4395 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { 4396 return vmin_s32(a, b); 4397 } 4398 4399 // CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4400 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4401 // CHECK: ret <8 x i8> [[VMIN_I]] 4402 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { 4403 return vmin_u8(a, b); 4404 } 4405 4406 // CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4407 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4408 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4409 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4410 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4411 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> [[VMIN_I]], <4 x i16> [[VMIN1_I]]) #4 4412 // CHECK: ret <4 x i16> [[VMIN2_I]] 4413 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { 4414 return vmin_u16(a, b); 4415 } 4416 4417 // CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4418 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4419 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4420 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4421 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4422 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> [[VMIN_I]], <2 x i32> [[VMIN1_I]]) #4 4423 // CHECK: ret <2 x i32> [[VMIN2_I]] 4424 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { 4425 return vmin_u32(a, b); 4426 } 4427 4428 // CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 { 4429 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4430 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4431 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4432 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4433 // CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> [[VMIN_I]], <2 x float> [[VMIN1_I]]) #4 4434 // CHECK: ret <2 x float> [[VMIN2_I]] 4435 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { 4436 return vmin_f32(a, b); 4437 } 4438 4439 // CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4440 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4441 // CHECK: ret <16 x i8> [[VMIN_I]] 4442 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { 4443 return vminq_s8(a, b); 4444 } 4445 4446 // CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4447 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4448 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4449 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4450 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4451 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4 4452 // CHECK: ret <8 x i16> [[VMIN2_I]] 4453 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { 4454 return vminq_s16(a, b); 4455 } 4456 4457 // CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4458 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4459 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4460 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4461 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4462 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4 4463 // CHECK: ret <4 x i32> [[VMIN2_I]] 4464 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { 4465 return vminq_s32(a, b); 4466 } 4467 4468 // CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4469 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4470 // CHECK: ret <16 x i8> [[VMIN_I]] 4471 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { 4472 return vminq_u8(a, b); 4473 } 4474 4475 // CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4476 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4477 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4478 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4479 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4480 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> [[VMIN_I]], <8 x i16> [[VMIN1_I]]) #4 4481 // CHECK: ret <8 x i16> [[VMIN2_I]] 4482 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { 4483 return vminq_u16(a, b); 4484 } 4485 4486 // CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4487 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4488 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4489 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4490 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4491 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> [[VMIN_I]], <4 x i32> [[VMIN1_I]]) #4 4492 // CHECK: ret <4 x i32> [[VMIN2_I]] 4493 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { 4494 return vminq_u32(a, b); 4495 } 4496 4497 // CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 { 4498 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4499 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4500 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4501 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4502 // CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> [[VMIN_I]], <4 x float> [[VMIN1_I]]) #4 4503 // CHECK: ret <4 x float> [[VMIN2_I]] 4504 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { 4505 return vminq_f32(a, b); 4506 } 4507 4508 // CHECK-LABEL: define <2 x double> @test_vminq_f64(<2 x double> %a, <2 x double> %b) #0 { 4509 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4510 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4511 // CHECK: [[VMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4512 // CHECK: [[VMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4513 // CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> [[VMIN_I]], <2 x double> [[VMIN1_I]]) #4 4514 // CHECK: ret <2 x double> [[VMIN2_I]] 4515 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) { 4516 return vminq_f64(a, b); 4517 } 4518 4519 // CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 { 4520 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4521 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4522 // CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4523 // CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4524 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> [[VMAXNM_I]], <2 x float> [[VMAXNM1_I]]) #4 4525 // CHECK: ret <2 x float> [[VMAXNM2_I]] 4526 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { 4527 return vmaxnm_f32(a, b); 4528 } 4529 4530 // CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { 4531 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4532 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4533 // CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4534 // CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4535 // CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> [[VMAXNM_I]], <4 x float> [[VMAXNM1_I]]) #4 4536 // CHECK: ret <4 x float> [[VMAXNM2_I]] 4537 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { 4538 return vmaxnmq_f32(a, b); 4539 } 4540 4541 // CHECK-LABEL: define <2 x double> @test_vmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 { 4542 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4543 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4544 // CHECK: [[VMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4545 // CHECK: [[VMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4546 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> [[VMAXNM_I]], <2 x double> [[VMAXNM1_I]]) #4 4547 // CHECK: ret <2 x double> [[VMAXNM2_I]] 4548 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) { 4549 return vmaxnmq_f64(a, b); 4550 } 4551 4552 // CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 { 4553 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4554 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4555 // CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4556 // CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4557 // CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> [[VMINNM_I]], <2 x float> [[VMINNM1_I]]) #4 4558 // CHECK: ret <2 x float> [[VMINNM2_I]] 4559 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { 4560 return vminnm_f32(a, b); 4561 } 4562 4563 // CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { 4564 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4565 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4566 // CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4567 // CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4568 // CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> [[VMINNM_I]], <4 x float> [[VMINNM1_I]]) #4 4569 // CHECK: ret <4 x float> [[VMINNM2_I]] 4570 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { 4571 return vminnmq_f32(a, b); 4572 } 4573 4574 // CHECK-LABEL: define <2 x double> @test_vminnmq_f64(<2 x double> %a, <2 x double> %b) #0 { 4575 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4576 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4577 // CHECK: [[VMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4578 // CHECK: [[VMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4579 // CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> [[VMINNM_I]], <2 x double> [[VMINNM1_I]]) #4 4580 // CHECK: ret <2 x double> [[VMINNM2_I]] 4581 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) { 4582 return vminnmq_f64(a, b); 4583 } 4584 4585 // CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4586 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4587 // CHECK: ret <8 x i8> [[VPMAX_I]] 4588 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { 4589 return vpmax_s8(a, b); 4590 } 4591 4592 // CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4593 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4594 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4595 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4596 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4597 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4 4598 // CHECK: ret <4 x i16> [[VPMAX2_I]] 4599 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { 4600 return vpmax_s16(a, b); 4601 } 4602 4603 // CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4604 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4605 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4606 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4607 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4608 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4 4609 // CHECK: ret <2 x i32> [[VPMAX2_I]] 4610 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { 4611 return vpmax_s32(a, b); 4612 } 4613 4614 // CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4615 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4616 // CHECK: ret <8 x i8> [[VPMAX_I]] 4617 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { 4618 return vpmax_u8(a, b); 4619 } 4620 4621 // CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4622 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4623 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4624 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4625 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4626 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> [[VPMAX_I]], <4 x i16> [[VPMAX1_I]]) #4 4627 // CHECK: ret <4 x i16> [[VPMAX2_I]] 4628 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { 4629 return vpmax_u16(a, b); 4630 } 4631 4632 // CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4634 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4635 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4636 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4637 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> [[VPMAX_I]], <2 x i32> [[VPMAX1_I]]) #4 4638 // CHECK: ret <2 x i32> [[VPMAX2_I]] 4639 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { 4640 return vpmax_u32(a, b); 4641 } 4642 4643 // CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 { 4644 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4645 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4646 // CHECK: [[VPMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4647 // CHECK: [[VPMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4648 // CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> [[VPMAX_I]], <2 x float> [[VPMAX1_I]]) #4 4649 // CHECK: ret <2 x float> [[VPMAX2_I]] 4650 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { 4651 return vpmax_f32(a, b); 4652 } 4653 4654 // CHECK-LABEL: define <16 x i8> @test_vpmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4655 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4656 // CHECK: ret <16 x i8> [[VPMAX_I]] 4657 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { 4658 return vpmaxq_s8(a, b); 4659 } 4660 4661 // CHECK-LABEL: define <8 x i16> @test_vpmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4662 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4663 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4664 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4665 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4666 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4 4667 // CHECK: ret <8 x i16> [[VPMAX2_I]] 4668 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) { 4669 return vpmaxq_s16(a, b); 4670 } 4671 4672 // CHECK-LABEL: define <4 x i32> @test_vpmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4673 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4674 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4675 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4676 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4677 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4 4678 // CHECK: ret <4 x i32> [[VPMAX2_I]] 4679 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) { 4680 return vpmaxq_s32(a, b); 4681 } 4682 4683 // CHECK-LABEL: define <16 x i8> @test_vpmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4684 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4685 // CHECK: ret <16 x i8> [[VPMAX_I]] 4686 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) { 4687 return vpmaxq_u8(a, b); 4688 } 4689 4690 // CHECK-LABEL: define <8 x i16> @test_vpmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4691 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4692 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4693 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4694 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4695 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> [[VPMAX_I]], <8 x i16> [[VPMAX1_I]]) #4 4696 // CHECK: ret <8 x i16> [[VPMAX2_I]] 4697 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) { 4698 return vpmaxq_u16(a, b); 4699 } 4700 4701 // CHECK-LABEL: define <4 x i32> @test_vpmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4702 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4703 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4704 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4705 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4706 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> [[VPMAX_I]], <4 x i32> [[VPMAX1_I]]) #4 4707 // CHECK: ret <4 x i32> [[VPMAX2_I]] 4708 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) { 4709 return vpmaxq_u32(a, b); 4710 } 4711 4712 // CHECK-LABEL: define <4 x float> @test_vpmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { 4713 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4714 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4715 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4716 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4717 // CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> [[VPMAX_I]], <4 x float> [[VPMAX1_I]]) #4 4718 // CHECK: ret <4 x float> [[VPMAX2_I]] 4719 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) { 4720 return vpmaxq_f32(a, b); 4721 } 4722 4723 // CHECK-LABEL: define <2 x double> @test_vpmaxq_f64(<2 x double> %a, <2 x double> %b) #0 { 4724 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4725 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4726 // CHECK: [[VPMAX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4727 // CHECK: [[VPMAX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4728 // CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> [[VPMAX_I]], <2 x double> [[VPMAX1_I]]) #4 4729 // CHECK: ret <2 x double> [[VPMAX2_I]] 4730 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) { 4731 return vpmaxq_f64(a, b); 4732 } 4733 4734 // CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4735 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4736 // CHECK: ret <8 x i8> [[VPMIN_I]] 4737 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { 4738 return vpmin_s8(a, b); 4739 } 4740 4741 // CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4742 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4743 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4744 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4745 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4746 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4 4747 // CHECK: ret <4 x i16> [[VPMIN2_I]] 4748 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { 4749 return vpmin_s16(a, b); 4750 } 4751 4752 // CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4754 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4755 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4756 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4757 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4 4758 // CHECK: ret <2 x i32> [[VPMIN2_I]] 4759 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { 4760 return vpmin_s32(a, b); 4761 } 4762 4763 // CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4764 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4765 // CHECK: ret <8 x i8> [[VPMIN_I]] 4766 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { 4767 return vpmin_u8(a, b); 4768 } 4769 4770 // CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4771 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4772 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4773 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4774 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4775 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]]) #4 4776 // CHECK: ret <4 x i16> [[VPMIN2_I]] 4777 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { 4778 return vpmin_u16(a, b); 4779 } 4780 4781 // CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4782 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4783 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4784 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4785 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4786 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]]) #4 4787 // CHECK: ret <2 x i32> [[VPMIN2_I]] 4788 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { 4789 return vpmin_u32(a, b); 4790 } 4791 4792 // CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 { 4793 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4794 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4795 // CHECK: [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4796 // CHECK: [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4797 // CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> [[VPMIN1_I]]) #4 4798 // CHECK: ret <2 x float> [[VPMIN2_I]] 4799 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { 4800 return vpmin_f32(a, b); 4801 } 4802 4803 // CHECK-LABEL: define <16 x i8> @test_vpminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4804 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4805 // CHECK: ret <16 x i8> [[VPMIN_I]] 4806 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) { 4807 return vpminq_s8(a, b); 4808 } 4809 4810 // CHECK-LABEL: define <8 x i16> @test_vpminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4811 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4812 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4813 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4814 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4815 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4 4816 // CHECK: ret <8 x i16> [[VPMIN2_I]] 4817 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) { 4818 return vpminq_s16(a, b); 4819 } 4820 4821 // CHECK-LABEL: define <4 x i32> @test_vpminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4822 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4823 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4824 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4825 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4826 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4 4827 // CHECK: ret <4 x i32> [[VPMIN2_I]] 4828 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) { 4829 return vpminq_s32(a, b); 4830 } 4831 4832 // CHECK-LABEL: define <16 x i8> @test_vpminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4833 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4834 // CHECK: ret <16 x i8> [[VPMIN_I]] 4835 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) { 4836 return vpminq_u8(a, b); 4837 } 4838 4839 // CHECK-LABEL: define <8 x i16> @test_vpminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4840 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4841 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4842 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4843 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4844 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]]) #4 4845 // CHECK: ret <8 x i16> [[VPMIN2_I]] 4846 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) { 4847 return vpminq_u16(a, b); 4848 } 4849 4850 // CHECK-LABEL: define <4 x i32> @test_vpminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4851 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4852 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4853 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4854 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4855 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]]) #4 4856 // CHECK: ret <4 x i32> [[VPMIN2_I]] 4857 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) { 4858 return vpminq_u32(a, b); 4859 } 4860 4861 // CHECK-LABEL: define <4 x float> @test_vpminq_f32(<4 x float> %a, <4 x float> %b) #0 { 4862 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4863 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4864 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4865 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4866 // CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> [[VPMIN1_I]]) #4 4867 // CHECK: ret <4 x float> [[VPMIN2_I]] 4868 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) { 4869 return vpminq_f32(a, b); 4870 } 4871 4872 // CHECK-LABEL: define <2 x double> @test_vpminq_f64(<2 x double> %a, <2 x double> %b) #0 { 4873 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4874 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4875 // CHECK: [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4876 // CHECK: [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4877 // CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> [[VPMIN1_I]]) #4 4878 // CHECK: ret <2 x double> [[VPMIN2_I]] 4879 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) { 4880 return vpminq_f64(a, b); 4881 } 4882 4883 // CHECK-LABEL: define <2 x float> @test_vpmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 { 4884 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4885 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4886 // CHECK: [[VPMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4887 // CHECK: [[VPMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4888 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> [[VPMAXNM_I]], <2 x float> [[VPMAXNM1_I]]) #4 4889 // CHECK: ret <2 x float> [[VPMAXNM2_I]] 4890 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) { 4891 return vpmaxnm_f32(a, b); 4892 } 4893 4894 // CHECK-LABEL: define <4 x float> @test_vpmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 { 4895 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4896 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4897 // CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4898 // CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4899 // CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> [[VPMAXNM_I]], <4 x float> [[VPMAXNM1_I]]) #4 4900 // CHECK: ret <4 x float> [[VPMAXNM2_I]] 4901 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) { 4902 return vpmaxnmq_f32(a, b); 4903 } 4904 4905 // CHECK-LABEL: define <2 x double> @test_vpmaxnmq_f64(<2 x double> %a, <2 x double> %b) #0 { 4906 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4907 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4908 // CHECK: [[VPMAXNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4909 // CHECK: [[VPMAXNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4910 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> [[VPMAXNM_I]], <2 x double> [[VPMAXNM1_I]]) #4 4911 // CHECK: ret <2 x double> [[VPMAXNM2_I]] 4912 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) { 4913 return vpmaxnmq_f64(a, b); 4914 } 4915 4916 // CHECK-LABEL: define <2 x float> @test_vpminnm_f32(<2 x float> %a, <2 x float> %b) #0 { 4917 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4918 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4919 // CHECK: [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 4920 // CHECK: [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4921 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> [[VPMINNM1_I]]) #4 4922 // CHECK: ret <2 x float> [[VPMINNM2_I]] 4923 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) { 4924 return vpminnm_f32(a, b); 4925 } 4926 4927 // CHECK-LABEL: define <4 x float> @test_vpminnmq_f32(<4 x float> %a, <4 x float> %b) #0 { 4928 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4929 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4930 // CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 4931 // CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4932 // CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> [[VPMINNM1_I]]) #4 4933 // CHECK: ret <4 x float> [[VPMINNM2_I]] 4934 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) { 4935 return vpminnmq_f32(a, b); 4936 } 4937 4938 // CHECK-LABEL: define <2 x double> @test_vpminnmq_f64(<2 x double> %a, <2 x double> %b) #0 { 4939 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4940 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4941 // CHECK: [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 4942 // CHECK: [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 4943 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> [[VPMINNM1_I]]) #4 4944 // CHECK: ret <2 x double> [[VPMINNM2_I]] 4945 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) { 4946 return vpminnmq_f64(a, b); 4947 } 4948 4949 // CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 4950 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4951 // CHECK: ret <8 x i8> [[VPADD_V_I]] 4952 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { 4953 return vpadd_s8(a, b); 4954 } 4955 4956 // CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 4957 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4958 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4959 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4960 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4961 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 4962 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 4963 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> 4964 // CHECK: ret <4 x i16> [[TMP2]] 4965 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { 4966 return vpadd_s16(a, b); 4967 } 4968 4969 // CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4970 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4971 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4972 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4973 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4974 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 4975 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 4976 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> 4977 // CHECK: ret <2 x i32> [[TMP2]] 4978 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { 4979 return vpadd_s32(a, b); 4980 } 4981 4982 // CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4983 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4984 // CHECK: ret <8 x i8> [[VPADD_V_I]] 4985 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { 4986 return vpadd_u8(a, b); 4987 } 4988 4989 // CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4991 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4992 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4993 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4994 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 4995 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 4996 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> 4997 // CHECK: ret <4 x i16> [[TMP2]] 4998 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { 4999 return vpadd_u16(a, b); 5000 } 5001 5002 // CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 5003 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5004 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5005 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5006 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5007 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 5008 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 5009 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> 5010 // CHECK: ret <2 x i32> [[TMP2]] 5011 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { 5012 return vpadd_u32(a, b); 5013 } 5014 5015 // CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 { 5016 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 5017 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 5018 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 5019 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 5020 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4 5021 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> 5022 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float> 5023 // CHECK: ret <2 x float> [[TMP2]] 5024 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { 5025 return vpadd_f32(a, b); 5026 } 5027 5028 // CHECK-LABEL: define <16 x i8> @test_vpaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 5029 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 5030 // CHECK: ret <16 x i8> [[VPADDQ_V_I]] 5031 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) { 5032 return vpaddq_s8(a, b); 5033 } 5034 5035 // CHECK-LABEL: define <8 x i16> @test_vpaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5036 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5037 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5038 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5039 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5040 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4 5041 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> 5042 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16> 5043 // CHECK: ret <8 x i16> [[TMP2]] 5044 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) { 5045 return vpaddq_s16(a, b); 5046 } 5047 5048 // CHECK-LABEL: define <4 x i32> @test_vpaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5049 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5050 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5051 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5052 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5053 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4 5054 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> 5055 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32> 5056 // CHECK: ret <4 x i32> [[TMP2]] 5057 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) { 5058 return vpaddq_s32(a, b); 5059 } 5060 5061 // CHECK-LABEL: define <16 x i8> @test_vpaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 5062 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 5063 // CHECK: ret <16 x i8> [[VPADDQ_V_I]] 5064 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) { 5065 return vpaddq_u8(a, b); 5066 } 5067 5068 // CHECK-LABEL: define <8 x i16> @test_vpaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 5069 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5070 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5071 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5072 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5073 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[VPADDQ_V_I]], <8 x i16> [[VPADDQ_V1_I]]) #4 5074 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> 5075 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <8 x i16> 5076 // CHECK: ret <8 x i16> [[TMP2]] 5077 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) { 5078 return vpaddq_u16(a, b); 5079 } 5080 5081 // CHECK-LABEL: define <4 x i32> @test_vpaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 5082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5083 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5084 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5085 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5086 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[VPADDQ_V_I]], <4 x i32> [[VPADDQ_V1_I]]) #4 5087 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> 5088 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x i32> 5089 // CHECK: ret <4 x i32> [[TMP2]] 5090 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) { 5091 return vpaddq_u32(a, b); 5092 } 5093 5094 // CHECK-LABEL: define <4 x float> @test_vpaddq_f32(<4 x float> %a, <4 x float> %b) #0 { 5095 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 5096 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 5097 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 5098 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 5099 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> [[VPADDQ_V_I]], <4 x float> [[VPADDQ_V1_I]]) #4 5100 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8> 5101 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <4 x float> 5102 // CHECK: ret <4 x float> [[TMP2]] 5103 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { 5104 return vpaddq_f32(a, b); 5105 } 5106 5107 // CHECK-LABEL: define <2 x double> @test_vpaddq_f64(<2 x double> %a, <2 x double> %b) #0 { 5108 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 5109 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 5110 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 5111 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 5112 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> [[VPADDQ_V_I]], <2 x double> [[VPADDQ_V1_I]]) #4 5113 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8> 5114 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x double> 5115 // CHECK: ret <2 x double> [[TMP2]] 5116 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { 5117 return vpaddq_f64(a, b); 5118 } 5119 5120 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5121 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5122 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5123 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5124 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5125 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 5126 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 5127 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 5128 // CHECK: ret <4 x i16> [[TMP2]] 5129 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { 5130 return vqdmulh_s16(a, b); 5131 } 5132 5133 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5134 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5135 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5136 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5137 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5138 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 5139 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 5140 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 5141 // CHECK: ret <2 x i32> [[TMP2]] 5142 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { 5143 return vqdmulh_s32(a, b); 5144 } 5145 5146 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5147 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5148 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5149 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5150 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5151 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 5152 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 5153 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 5154 // CHECK: ret <8 x i16> [[TMP2]] 5155 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { 5156 return vqdmulhq_s16(a, b); 5157 } 5158 5159 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5160 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5161 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5162 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5163 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5164 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 5165 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 5166 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 5167 // CHECK: ret <4 x i32> [[TMP2]] 5168 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { 5169 return vqdmulhq_s32(a, b); 5170 } 5171 5172 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5173 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5174 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5175 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5176 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5177 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 5178 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 5179 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 5180 // CHECK: ret <4 x i16> [[TMP2]] 5181 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { 5182 return vqrdmulh_s16(a, b); 5183 } 5184 5185 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5186 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5187 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5188 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5189 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5190 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 5191 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 5192 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 5193 // CHECK: ret <2 x i32> [[TMP2]] 5194 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { 5195 return vqrdmulh_s32(a, b); 5196 } 5197 5198 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5199 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5200 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5201 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5202 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5203 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 5204 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 5205 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 5206 // CHECK: ret <8 x i16> [[TMP2]] 5207 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { 5208 return vqrdmulhq_s16(a, b); 5209 } 5210 5211 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5212 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5213 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5214 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5215 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5216 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 5217 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 5218 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 5219 // CHECK: ret <4 x i32> [[TMP2]] 5220 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { 5221 return vqrdmulhq_s32(a, b); 5222 } 5223 5224 // CHECK-LABEL: define <2 x float> @test_vmulx_f32(<2 x float> %a, <2 x float> %b) #0 { 5225 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 5226 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 5227 // CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 5228 // CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 5229 // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[VMULX_I]], <2 x float> [[VMULX1_I]]) #4 5230 // CHECK: ret <2 x float> [[VMULX2_I]] 5231 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) { 5232 return vmulx_f32(a, b); 5233 } 5234 5235 // CHECK-LABEL: define <4 x float> @test_vmulxq_f32(<4 x float> %a, <4 x float> %b) #0 { 5236 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 5237 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 5238 // CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 5239 // CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 5240 // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[VMULX_I]], <4 x float> [[VMULX1_I]]) #4 5241 // CHECK: ret <4 x float> [[VMULX2_I]] 5242 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) { 5243 return vmulxq_f32(a, b); 5244 } 5245 5246 // CHECK-LABEL: define <2 x double> @test_vmulxq_f64(<2 x double> %a, <2 x double> %b) #0 { 5247 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 5248 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 5249 // CHECK: [[VMULX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 5250 // CHECK: [[VMULX1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 5251 // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[VMULX_I]], <2 x double> [[VMULX1_I]]) #4 5252 // CHECK: ret <2 x double> [[VMULX2_I]] 5253 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) { 5254 return vmulxq_f64(a, b); 5255 } 5256 5257 // CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 { 5258 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5259 // CHECK: ret <8 x i8> [[VSHL_N]] 5260 int8x8_t test_vshl_n_s8(int8x8_t a) { 5261 return vshl_n_s8(a, 3); 5262 } 5263 5264 // CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 { 5265 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5266 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5267 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3> 5268 // CHECK: ret <4 x i16> [[VSHL_N]] 5269 int16x4_t test_vshl_n_s16(int16x4_t a) { 5270 return vshl_n_s16(a, 3); 5271 } 5272 5273 // CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 { 5274 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5275 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5276 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3> 5277 // CHECK: ret <2 x i32> [[VSHL_N]] 5278 int32x2_t test_vshl_n_s32(int32x2_t a) { 5279 return vshl_n_s32(a, 3); 5280 } 5281 5282 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 { 5283 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5284 // CHECK: ret <16 x i8> [[VSHL_N]] 5285 int8x16_t test_vshlq_n_s8(int8x16_t a) { 5286 return vshlq_n_s8(a, 3); 5287 } 5288 5289 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 { 5290 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5291 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5292 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5293 // CHECK: ret <8 x i16> [[VSHL_N]] 5294 int16x8_t test_vshlq_n_s16(int16x8_t a) { 5295 return vshlq_n_s16(a, 3); 5296 } 5297 5298 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 { 5299 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5300 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5301 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> 5302 // CHECK: ret <4 x i32> [[VSHL_N]] 5303 int32x4_t test_vshlq_n_s32(int32x4_t a) { 5304 return vshlq_n_s32(a, 3); 5305 } 5306 5307 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 { 5308 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5309 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5310 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3> 5311 // CHECK: ret <2 x i64> [[VSHL_N]] 5312 int64x2_t test_vshlq_n_s64(int64x2_t a) { 5313 return vshlq_n_s64(a, 3); 5314 } 5315 5316 // CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 { 5317 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5318 // CHECK: ret <8 x i8> [[VSHL_N]] 5319 int8x8_t test_vshl_n_u8(int8x8_t a) { 5320 return vshl_n_u8(a, 3); 5321 } 5322 5323 // CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 { 5324 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5325 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5326 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3> 5327 // CHECK: ret <4 x i16> [[VSHL_N]] 5328 int16x4_t test_vshl_n_u16(int16x4_t a) { 5329 return vshl_n_u16(a, 3); 5330 } 5331 5332 // CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 { 5333 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5334 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5335 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3> 5336 // CHECK: ret <2 x i32> [[VSHL_N]] 5337 int32x2_t test_vshl_n_u32(int32x2_t a) { 5338 return vshl_n_u32(a, 3); 5339 } 5340 5341 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 { 5342 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5343 // CHECK: ret <16 x i8> [[VSHL_N]] 5344 int8x16_t test_vshlq_n_u8(int8x16_t a) { 5345 return vshlq_n_u8(a, 3); 5346 } 5347 5348 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 { 5349 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5350 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5351 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5352 // CHECK: ret <8 x i16> [[VSHL_N]] 5353 int16x8_t test_vshlq_n_u16(int16x8_t a) { 5354 return vshlq_n_u16(a, 3); 5355 } 5356 5357 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 { 5358 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5359 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5360 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> 5361 // CHECK: ret <4 x i32> [[VSHL_N]] 5362 int32x4_t test_vshlq_n_u32(int32x4_t a) { 5363 return vshlq_n_u32(a, 3); 5364 } 5365 5366 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 { 5367 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5368 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5369 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3> 5370 // CHECK: ret <2 x i64> [[VSHL_N]] 5371 int64x2_t test_vshlq_n_u64(int64x2_t a) { 5372 return vshlq_n_u64(a, 3); 5373 } 5374 5375 // CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 { 5376 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5377 // CHECK: ret <8 x i8> [[VSHR_N]] 5378 int8x8_t test_vshr_n_s8(int8x8_t a) { 5379 return vshr_n_s8(a, 3); 5380 } 5381 5382 // CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 { 5383 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5384 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5385 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3> 5386 // CHECK: ret <4 x i16> [[VSHR_N]] 5387 int16x4_t test_vshr_n_s16(int16x4_t a) { 5388 return vshr_n_s16(a, 3); 5389 } 5390 5391 // CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 { 5392 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5393 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5394 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3> 5395 // CHECK: ret <2 x i32> [[VSHR_N]] 5396 int32x2_t test_vshr_n_s32(int32x2_t a) { 5397 return vshr_n_s32(a, 3); 5398 } 5399 5400 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 { 5401 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5402 // CHECK: ret <16 x i8> [[VSHR_N]] 5403 int8x16_t test_vshrq_n_s8(int8x16_t a) { 5404 return vshrq_n_s8(a, 3); 5405 } 5406 5407 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 { 5408 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5409 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5410 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5411 // CHECK: ret <8 x i16> [[VSHR_N]] 5412 int16x8_t test_vshrq_n_s16(int16x8_t a) { 5413 return vshrq_n_s16(a, 3); 5414 } 5415 5416 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 { 5417 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5418 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5419 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> 5420 // CHECK: ret <4 x i32> [[VSHR_N]] 5421 int32x4_t test_vshrq_n_s32(int32x4_t a) { 5422 return vshrq_n_s32(a, 3); 5423 } 5424 5425 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 { 5426 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5427 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5428 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3> 5429 // CHECK: ret <2 x i64> [[VSHR_N]] 5430 int64x2_t test_vshrq_n_s64(int64x2_t a) { 5431 return vshrq_n_s64(a, 3); 5432 } 5433 5434 // CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 { 5435 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5436 // CHECK: ret <8 x i8> [[VSHR_N]] 5437 int8x8_t test_vshr_n_u8(int8x8_t a) { 5438 return vshr_n_u8(a, 3); 5439 } 5440 5441 // CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 { 5442 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5443 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5444 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3> 5445 // CHECK: ret <4 x i16> [[VSHR_N]] 5446 int16x4_t test_vshr_n_u16(int16x4_t a) { 5447 return vshr_n_u16(a, 3); 5448 } 5449 5450 // CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 { 5451 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5452 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5453 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3> 5454 // CHECK: ret <2 x i32> [[VSHR_N]] 5455 int32x2_t test_vshr_n_u32(int32x2_t a) { 5456 return vshr_n_u32(a, 3); 5457 } 5458 5459 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 { 5460 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5461 // CHECK: ret <16 x i8> [[VSHR_N]] 5462 int8x16_t test_vshrq_n_u8(int8x16_t a) { 5463 return vshrq_n_u8(a, 3); 5464 } 5465 5466 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 { 5467 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5468 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5469 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5470 // CHECK: ret <8 x i16> [[VSHR_N]] 5471 int16x8_t test_vshrq_n_u16(int16x8_t a) { 5472 return vshrq_n_u16(a, 3); 5473 } 5474 5475 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 { 5476 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5477 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5478 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3> 5479 // CHECK: ret <4 x i32> [[VSHR_N]] 5480 int32x4_t test_vshrq_n_u32(int32x4_t a) { 5481 return vshrq_n_u32(a, 3); 5482 } 5483 5484 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 { 5485 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5486 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5487 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3> 5488 // CHECK: ret <2 x i64> [[VSHR_N]] 5489 int64x2_t test_vshrq_n_u64(int64x2_t a) { 5490 return vshrq_n_u64(a, 3); 5491 } 5492 5493 // CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 5494 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5495 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 5496 // CHECK: ret <8 x i8> [[TMP0]] 5497 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { 5498 return vsra_n_s8(a, b, 3); 5499 } 5500 5501 // CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5503 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5504 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5505 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5506 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3> 5507 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 5508 // CHECK: ret <4 x i16> [[TMP4]] 5509 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { 5510 return vsra_n_s16(a, b, 3); 5511 } 5512 5513 // CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5514 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5515 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5516 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5517 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5518 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3> 5519 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 5520 // CHECK: ret <2 x i32> [[TMP4]] 5521 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { 5522 return vsra_n_s32(a, b, 3); 5523 } 5524 5525 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 5526 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5527 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 5528 // CHECK: ret <16 x i8> [[TMP0]] 5529 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { 5530 return vsraq_n_s8(a, b, 3); 5531 } 5532 5533 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5534 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5535 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5536 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5537 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5538 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5539 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 5540 // CHECK: ret <8 x i16> [[TMP4]] 5541 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { 5542 return vsraq_n_s16(a, b, 3); 5543 } 5544 5545 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5546 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5547 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5548 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5549 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5550 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3> 5551 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 5552 // CHECK: ret <4 x i32> [[TMP4]] 5553 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { 5554 return vsraq_n_s32(a, b, 3); 5555 } 5556 5557 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 5558 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5559 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5560 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5561 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5562 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3> 5563 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 5564 // CHECK: ret <2 x i64> [[TMP4]] 5565 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { 5566 return vsraq_n_s64(a, b, 3); 5567 } 5568 5569 // CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 5570 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5571 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 5572 // CHECK: ret <8 x i8> [[TMP0]] 5573 int8x8_t test_vsra_n_u8(int8x8_t a, int8x8_t b) { 5574 return vsra_n_u8(a, b, 3); 5575 } 5576 5577 // CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 5578 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5579 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5580 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5581 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5582 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3> 5583 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 5584 // CHECK: ret <4 x i16> [[TMP4]] 5585 int16x4_t test_vsra_n_u16(int16x4_t a, int16x4_t b) { 5586 return vsra_n_u16(a, b, 3); 5587 } 5588 5589 // CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 5590 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5591 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5592 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5593 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5594 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3> 5595 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 5596 // CHECK: ret <2 x i32> [[TMP4]] 5597 int32x2_t test_vsra_n_u32(int32x2_t a, int32x2_t b) { 5598 return vsra_n_u32(a, b, 3); 5599 } 5600 5601 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 5602 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 5603 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 5604 // CHECK: ret <16 x i8> [[TMP0]] 5605 int8x16_t test_vsraq_n_u8(int8x16_t a, int8x16_t b) { 5606 return vsraq_n_u8(a, b, 3); 5607 } 5608 5609 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 5610 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5612 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5613 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5614 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 5615 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 5616 // CHECK: ret <8 x i16> [[TMP4]] 5617 int16x8_t test_vsraq_n_u16(int16x8_t a, int16x8_t b) { 5618 return vsraq_n_u16(a, b, 3); 5619 } 5620 5621 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 5622 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5623 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5624 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5625 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5626 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3> 5627 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 5628 // CHECK: ret <4 x i32> [[TMP4]] 5629 int32x4_t test_vsraq_n_u32(int32x4_t a, int32x4_t b) { 5630 return vsraq_n_u32(a, b, 3); 5631 } 5632 5633 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 5634 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5635 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5636 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5637 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5638 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3> 5639 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 5640 // CHECK: ret <2 x i64> [[TMP4]] 5641 int64x2_t test_vsraq_n_u64(int64x2_t a, int64x2_t b) { 5642 return vsraq_n_u64(a, b, 3); 5643 } 5644 5645 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 { 5646 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5647 // CHECK: ret <8 x i8> [[VRSHR_N]] 5648 int8x8_t test_vrshr_n_s8(int8x8_t a) { 5649 return vrshr_n_s8(a, 3); 5650 } 5651 5652 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 { 5653 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5654 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5655 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>) 5656 // CHECK: ret <4 x i16> [[VRSHR_N1]] 5657 int16x4_t test_vrshr_n_s16(int16x4_t a) { 5658 return vrshr_n_s16(a, 3); 5659 } 5660 5661 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 { 5662 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5663 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5664 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>) 5665 // CHECK: ret <2 x i32> [[VRSHR_N1]] 5666 int32x2_t test_vrshr_n_s32(int32x2_t a) { 5667 return vrshr_n_s32(a, 3); 5668 } 5669 5670 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 { 5671 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5672 // CHECK: ret <16 x i8> [[VRSHR_N]] 5673 int8x16_t test_vrshrq_n_s8(int8x16_t a) { 5674 return vrshrq_n_s8(a, 3); 5675 } 5676 5677 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 { 5678 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5679 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5680 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>) 5681 // CHECK: ret <8 x i16> [[VRSHR_N1]] 5682 int16x8_t test_vrshrq_n_s16(int16x8_t a) { 5683 return vrshrq_n_s16(a, 3); 5684 } 5685 5686 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 { 5687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5688 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5689 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>) 5690 // CHECK: ret <4 x i32> [[VRSHR_N1]] 5691 int32x4_t test_vrshrq_n_s32(int32x4_t a) { 5692 return vrshrq_n_s32(a, 3); 5693 } 5694 5695 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 { 5696 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5697 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5698 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>) 5699 // CHECK: ret <2 x i64> [[VRSHR_N1]] 5700 int64x2_t test_vrshrq_n_s64(int64x2_t a) { 5701 return vrshrq_n_s64(a, 3); 5702 } 5703 5704 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 { 5705 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5706 // CHECK: ret <8 x i8> [[VRSHR_N]] 5707 int8x8_t test_vrshr_n_u8(int8x8_t a) { 5708 return vrshr_n_u8(a, 3); 5709 } 5710 5711 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 { 5712 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5713 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5714 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>) 5715 // CHECK: ret <4 x i16> [[VRSHR_N1]] 5716 int16x4_t test_vrshr_n_u16(int16x4_t a) { 5717 return vrshr_n_u16(a, 3); 5718 } 5719 5720 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 { 5721 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5722 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5723 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>) 5724 // CHECK: ret <2 x i32> [[VRSHR_N1]] 5725 int32x2_t test_vrshr_n_u32(int32x2_t a) { 5726 return vrshr_n_u32(a, 3); 5727 } 5728 5729 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 { 5730 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5731 // CHECK: ret <16 x i8> [[VRSHR_N]] 5732 int8x16_t test_vrshrq_n_u8(int8x16_t a) { 5733 return vrshrq_n_u8(a, 3); 5734 } 5735 5736 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 { 5737 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5738 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5739 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>) 5740 // CHECK: ret <8 x i16> [[VRSHR_N1]] 5741 int16x8_t test_vrshrq_n_u16(int16x8_t a) { 5742 return vrshrq_n_u16(a, 3); 5743 } 5744 5745 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 { 5746 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5747 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5748 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>) 5749 // CHECK: ret <4 x i32> [[VRSHR_N1]] 5750 int32x4_t test_vrshrq_n_u32(int32x4_t a) { 5751 return vrshrq_n_u32(a, 3); 5752 } 5753 5754 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 { 5755 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5756 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5757 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>) 5758 // CHECK: ret <2 x i64> [[VRSHR_N1]] 5759 int64x2_t test_vrshrq_n_u64(int64x2_t a) { 5760 return vrshrq_n_u64(a, 3); 5761 } 5762 5763 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 5764 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5765 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] 5766 // CHECK: ret <8 x i8> [[TMP0]] 5767 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { 5768 return vrsra_n_s8(a, b, 3); 5769 } 5770 5771 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5773 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5774 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5775 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>) 5776 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5777 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] 5778 // CHECK: ret <4 x i16> [[TMP3]] 5779 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { 5780 return vrsra_n_s16(a, b, 3); 5781 } 5782 5783 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5784 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5785 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5786 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5787 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>) 5788 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5789 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] 5790 // CHECK: ret <2 x i32> [[TMP3]] 5791 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { 5792 return vrsra_n_s32(a, b, 3); 5793 } 5794 5795 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 5796 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5797 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] 5798 // CHECK: ret <16 x i8> [[TMP0]] 5799 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { 5800 return vrsraq_n_s8(a, b, 3); 5801 } 5802 5803 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5804 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5805 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5806 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5807 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>) 5808 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5809 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] 5810 // CHECK: ret <8 x i16> [[TMP3]] 5811 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { 5812 return vrsraq_n_s16(a, b, 3); 5813 } 5814 5815 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5816 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5817 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5818 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5819 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>) 5820 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5821 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] 5822 // CHECK: ret <4 x i32> [[TMP3]] 5823 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { 5824 return vrsraq_n_s32(a, b, 3); 5825 } 5826 5827 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 5828 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5829 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5830 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5831 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>) 5832 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5833 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] 5834 // CHECK: ret <2 x i64> [[TMP3]] 5835 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { 5836 return vrsraq_n_s64(a, b, 3); 5837 } 5838 5839 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 5840 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5841 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] 5842 // CHECK: ret <8 x i8> [[TMP0]] 5843 int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) { 5844 return vrsra_n_u8(a, b, 3); 5845 } 5846 5847 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 5848 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5849 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5850 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5851 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>) 5852 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5853 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] 5854 // CHECK: ret <4 x i16> [[TMP3]] 5855 int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) { 5856 return vrsra_n_u16(a, b, 3); 5857 } 5858 5859 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 5860 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5861 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5862 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5863 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>) 5864 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5865 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] 5866 // CHECK: ret <2 x i32> [[TMP3]] 5867 int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) { 5868 return vrsra_n_u32(a, b, 3); 5869 } 5870 5871 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 5872 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>) 5873 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] 5874 // CHECK: ret <16 x i8> [[TMP0]] 5875 int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) { 5876 return vrsraq_n_u8(a, b, 3); 5877 } 5878 5879 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 5880 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5881 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5882 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5883 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>) 5884 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5885 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] 5886 // CHECK: ret <8 x i16> [[TMP3]] 5887 int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) { 5888 return vrsraq_n_u16(a, b, 3); 5889 } 5890 5891 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 5892 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5893 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5894 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5895 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>) 5896 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5897 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] 5898 // CHECK: ret <4 x i32> [[TMP3]] 5899 int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) { 5900 return vrsraq_n_u32(a, b, 3); 5901 } 5902 5903 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 5904 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5905 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5906 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5907 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>) 5908 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5909 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] 5910 // CHECK: ret <2 x i64> [[TMP3]] 5911 int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) { 5912 return vrsraq_n_u64(a, b, 3); 5913 } 5914 5915 // CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 5916 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5917 // CHECK: ret <8 x i8> [[VSRI_N]] 5918 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { 5919 return vsri_n_s8(a, b, 3); 5920 } 5921 5922 // CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 5923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5925 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5926 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5927 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) 5928 // CHECK: ret <4 x i16> [[VSRI_N2]] 5929 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { 5930 return vsri_n_s16(a, b, 3); 5931 } 5932 5933 // CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 5934 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5935 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5936 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5937 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5938 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) 5939 // CHECK: ret <2 x i32> [[VSRI_N2]] 5940 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { 5941 return vsri_n_s32(a, b, 3); 5942 } 5943 5944 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 5945 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 5946 // CHECK: ret <16 x i8> [[VSRI_N]] 5947 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { 5948 return vsriq_n_s8(a, b, 3); 5949 } 5950 5951 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 5952 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5953 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5954 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5955 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5956 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) 5957 // CHECK: ret <8 x i16> [[VSRI_N2]] 5958 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { 5959 return vsriq_n_s16(a, b, 3); 5960 } 5961 5962 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 5963 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5964 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5965 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5966 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5967 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) 5968 // CHECK: ret <4 x i32> [[VSRI_N2]] 5969 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { 5970 return vsriq_n_s32(a, b, 3); 5971 } 5972 5973 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 5974 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5975 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5976 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5977 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5978 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) 5979 // CHECK: ret <2 x i64> [[VSRI_N2]] 5980 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { 5981 return vsriq_n_s64(a, b, 3); 5982 } 5983 5984 // CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 5985 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5986 // CHECK: ret <8 x i8> [[VSRI_N]] 5987 int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) { 5988 return vsri_n_u8(a, b, 3); 5989 } 5990 5991 // CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 5992 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5993 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5994 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5995 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5996 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) 5997 // CHECK: ret <4 x i16> [[VSRI_N2]] 5998 int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) { 5999 return vsri_n_u16(a, b, 3); 6000 } 6001 6002 // CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 6003 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6004 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6005 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6006 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 6007 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) 6008 // CHECK: ret <2 x i32> [[VSRI_N2]] 6009 int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) { 6010 return vsri_n_u32(a, b, 3); 6011 } 6012 6013 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 6014 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6015 // CHECK: ret <16 x i8> [[VSRI_N]] 6016 int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) { 6017 return vsriq_n_u8(a, b, 3); 6018 } 6019 6020 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 6021 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6022 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6023 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6024 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6025 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) 6026 // CHECK: ret <8 x i16> [[VSRI_N2]] 6027 int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) { 6028 return vsriq_n_u16(a, b, 3); 6029 } 6030 6031 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 6032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6033 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6034 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6035 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 6036 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) 6037 // CHECK: ret <4 x i32> [[VSRI_N2]] 6038 int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) { 6039 return vsriq_n_u32(a, b, 3); 6040 } 6041 6042 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 6043 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6044 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6045 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6046 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 6047 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) 6048 // CHECK: ret <2 x i64> [[VSRI_N2]] 6049 int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) { 6050 return vsriq_n_u64(a, b, 3); 6051 } 6052 6053 // CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { 6054 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 6055 // CHECK: ret <8 x i8> [[VSRI_N]] 6056 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { 6057 return vsri_n_p8(a, b, 3); 6058 } 6059 6060 // CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { 6061 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6062 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6063 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6064 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 6065 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15) 6066 // CHECK: ret <4 x i16> [[VSRI_N2]] 6067 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { 6068 return vsri_n_p16(a, b, 15); 6069 } 6070 6071 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { 6072 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6073 // CHECK: ret <16 x i8> [[VSRI_N]] 6074 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { 6075 return vsriq_n_p8(a, b, 3); 6076 } 6077 6078 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { 6079 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6080 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6081 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6082 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6083 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15) 6084 // CHECK: ret <8 x i16> [[VSRI_N2]] 6085 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { 6086 return vsriq_n_p16(a, b, 15); 6087 } 6088 6089 // CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 6090 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 6091 // CHECK: ret <8 x i8> [[VSLI_N]] 6092 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { 6093 return vsli_n_s8(a, b, 3); 6094 } 6095 6096 // CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 6097 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6098 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6099 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6100 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 6101 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3) 6102 // CHECK: ret <4 x i16> [[VSLI_N2]] 6103 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { 6104 return vsli_n_s16(a, b, 3); 6105 } 6106 6107 // CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 6108 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6109 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6110 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6111 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 6112 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3) 6113 // CHECK: ret <2 x i32> [[VSLI_N2]] 6114 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { 6115 return vsli_n_s32(a, b, 3); 6116 } 6117 6118 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 6119 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6120 // CHECK: ret <16 x i8> [[VSLI_N]] 6121 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { 6122 return vsliq_n_s8(a, b, 3); 6123 } 6124 6125 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 6126 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6127 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6128 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6129 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6130 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3) 6131 // CHECK: ret <8 x i16> [[VSLI_N2]] 6132 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { 6133 return vsliq_n_s16(a, b, 3); 6134 } 6135 6136 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 6137 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6138 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6139 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6140 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 6141 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3) 6142 // CHECK: ret <4 x i32> [[VSLI_N2]] 6143 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { 6144 return vsliq_n_s32(a, b, 3); 6145 } 6146 6147 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 6148 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6149 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6150 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6151 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 6152 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3) 6153 // CHECK: ret <2 x i64> [[VSLI_N2]] 6154 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { 6155 return vsliq_n_s64(a, b, 3); 6156 } 6157 6158 // CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 6159 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 6160 // CHECK: ret <8 x i8> [[VSLI_N]] 6161 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { 6162 return vsli_n_u8(a, b, 3); 6163 } 6164 6165 // CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 6166 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6167 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6168 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6169 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 6170 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3) 6171 // CHECK: ret <4 x i16> [[VSLI_N2]] 6172 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { 6173 return vsli_n_u16(a, b, 3); 6174 } 6175 6176 // CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 6177 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6178 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6179 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6180 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 6181 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3) 6182 // CHECK: ret <2 x i32> [[VSLI_N2]] 6183 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { 6184 return vsli_n_u32(a, b, 3); 6185 } 6186 6187 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 6188 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6189 // CHECK: ret <16 x i8> [[VSLI_N]] 6190 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { 6191 return vsliq_n_u8(a, b, 3); 6192 } 6193 6194 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 6195 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6196 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6197 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6198 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6199 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3) 6200 // CHECK: ret <8 x i16> [[VSLI_N2]] 6201 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { 6202 return vsliq_n_u16(a, b, 3); 6203 } 6204 6205 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 6206 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6207 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6208 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6209 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 6210 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3) 6211 // CHECK: ret <4 x i32> [[VSLI_N2]] 6212 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { 6213 return vsliq_n_u32(a, b, 3); 6214 } 6215 6216 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 6217 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6218 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6219 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6220 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 6221 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3) 6222 // CHECK: ret <2 x i64> [[VSLI_N2]] 6223 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { 6224 return vsliq_n_u64(a, b, 3); 6225 } 6226 6227 // CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { 6228 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 6229 // CHECK: ret <8 x i8> [[VSLI_N]] 6230 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { 6231 return vsli_n_p8(a, b, 3); 6232 } 6233 6234 // CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { 6235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6236 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6237 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6238 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 6239 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15) 6240 // CHECK: ret <4 x i16> [[VSLI_N2]] 6241 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { 6242 return vsli_n_p16(a, b, 15); 6243 } 6244 6245 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { 6246 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 6247 // CHECK: ret <16 x i8> [[VSLI_N]] 6248 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { 6249 return vsliq_n_p8(a, b, 3); 6250 } 6251 6252 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { 6253 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6254 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6255 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6256 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 6257 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15) 6258 // CHECK: ret <8 x i16> [[VSLI_N2]] 6259 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { 6260 return vsliq_n_p16(a, b, 15); 6261 } 6262 6263 // CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 { 6264 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) 6265 // CHECK: ret <8 x i8> [[VQSHLU_N]] 6266 int8x8_t test_vqshlu_n_s8(int8x8_t a) { 6267 return vqshlu_n_s8(a, 3); 6268 } 6269 6270 // CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 { 6271 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6272 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6273 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>) 6274 // CHECK: ret <4 x i16> [[VQSHLU_N1]] 6275 int16x4_t test_vqshlu_n_s16(int16x4_t a) { 6276 return vqshlu_n_s16(a, 3); 6277 } 6278 6279 // CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 { 6280 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6281 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6282 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>) 6283 // CHECK: ret <2 x i32> [[VQSHLU_N1]] 6284 int32x2_t test_vqshlu_n_s32(int32x2_t a) { 6285 return vqshlu_n_s32(a, 3); 6286 } 6287 6288 // CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 { 6289 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) 6290 // CHECK: ret <16 x i8> [[VQSHLU_N]] 6291 int8x16_t test_vqshluq_n_s8(int8x16_t a) { 6292 return vqshluq_n_s8(a, 3); 6293 } 6294 6295 // CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 { 6296 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6297 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6298 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 6299 // CHECK: ret <8 x i16> [[VQSHLU_N1]] 6300 int16x8_t test_vqshluq_n_s16(int16x8_t a) { 6301 return vqshluq_n_s16(a, 3); 6302 } 6303 6304 // CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 { 6305 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6306 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6307 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>) 6308 // CHECK: ret <4 x i32> [[VQSHLU_N1]] 6309 int32x4_t test_vqshluq_n_s32(int32x4_t a) { 6310 return vqshluq_n_s32(a, 3); 6311 } 6312 6313 // CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 { 6314 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6315 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6316 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>) 6317 // CHECK: ret <2 x i64> [[VQSHLU_N1]] 6318 int64x2_t test_vqshluq_n_s64(int64x2_t a) { 6319 return vqshluq_n_s64(a, 3); 6320 } 6321 6322 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 { 6323 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6324 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6325 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6326 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 6327 // CHECK: ret <8 x i8> [[VSHRN_N]] 6328 int8x8_t test_vshrn_n_s16(int16x8_t a) { 6329 return vshrn_n_s16(a, 3); 6330 } 6331 6332 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 { 6333 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6334 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6335 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9> 6336 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 6337 // CHECK: ret <4 x i16> [[VSHRN_N]] 6338 int16x4_t test_vshrn_n_s32(int32x4_t a) { 6339 return vshrn_n_s32(a, 9); 6340 } 6341 6342 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 { 6343 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6344 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6345 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19> 6346 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 6347 // CHECK: ret <2 x i32> [[VSHRN_N]] 6348 int32x2_t test_vshrn_n_s64(int64x2_t a) { 6349 return vshrn_n_s64(a, 19); 6350 } 6351 6352 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 { 6353 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6354 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6355 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6356 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 6357 // CHECK: ret <8 x i8> [[VSHRN_N]] 6358 uint8x8_t test_vshrn_n_u16(uint16x8_t a) { 6359 return vshrn_n_u16(a, 3); 6360 } 6361 6362 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 { 6363 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6364 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6365 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9> 6366 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 6367 // CHECK: ret <4 x i16> [[VSHRN_N]] 6368 uint16x4_t test_vshrn_n_u32(uint32x4_t a) { 6369 return vshrn_n_u32(a, 9); 6370 } 6371 6372 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 { 6373 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6374 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6375 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19> 6376 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 6377 // CHECK: ret <2 x i32> [[VSHRN_N]] 6378 uint32x2_t test_vshrn_n_u64(uint64x2_t a) { 6379 return vshrn_n_u64(a, 19); 6380 } 6381 6382 // CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6383 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6384 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6385 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6386 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 6387 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6388 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6389 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6390 return vshrn_high_n_s16(a, b, 3); 6391 } 6392 6393 // CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6394 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6395 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6396 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9> 6397 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 6398 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6399 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6400 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6401 return vshrn_high_n_s32(a, b, 9); 6402 } 6403 6404 // CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6405 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6406 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6407 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19> 6408 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 6409 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6410 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6411 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6412 return vshrn_high_n_s64(a, b, 19); 6413 } 6414 6415 // CHECK-LABEL: define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { 6416 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6417 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6418 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6419 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 6420 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6421 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6422 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6423 return vshrn_high_n_u16(a, b, 3); 6424 } 6425 6426 // CHECK-LABEL: define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { 6427 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6428 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6429 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9> 6430 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 6431 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6432 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6433 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6434 return vshrn_high_n_u32(a, b, 9); 6435 } 6436 6437 // CHECK-LABEL: define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { 6438 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6439 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6440 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19> 6441 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 6442 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6443 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6444 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6445 return vshrn_high_n_u64(a, b, 19); 6446 } 6447 6448 // CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 { 6449 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6450 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6451 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) 6452 // CHECK: ret <8 x i8> [[VQSHRUN_N1]] 6453 int8x8_t test_vqshrun_n_s16(int16x8_t a) { 6454 return vqshrun_n_s16(a, 3); 6455 } 6456 6457 // CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 { 6458 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6459 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6460 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) 6461 // CHECK: ret <4 x i16> [[VQSHRUN_N1]] 6462 int16x4_t test_vqshrun_n_s32(int32x4_t a) { 6463 return vqshrun_n_s32(a, 9); 6464 } 6465 6466 // CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 { 6467 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6468 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6469 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) 6470 // CHECK: ret <2 x i32> [[VQSHRUN_N1]] 6471 int32x2_t test_vqshrun_n_s64(int64x2_t a) { 6472 return vqshrun_n_s64(a, 19); 6473 } 6474 6475 // CHECK-LABEL: define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6476 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6477 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6478 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) 6479 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6480 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6481 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) { 6482 return vqshrun_high_n_s16(a, b, 3); 6483 } 6484 6485 // CHECK-LABEL: define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6486 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6487 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6488 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) 6489 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6490 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6491 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) { 6492 return vqshrun_high_n_s32(a, b, 9); 6493 } 6494 6495 // CHECK-LABEL: define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6496 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6497 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6498 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) 6499 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6500 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6501 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) { 6502 return vqshrun_high_n_s64(a, b, 19); 6503 } 6504 6505 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 { 6506 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6507 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6508 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 6509 // CHECK: ret <8 x i8> [[VRSHRN_N1]] 6510 int8x8_t test_vrshrn_n_s16(int16x8_t a) { 6511 return vrshrn_n_s16(a, 3); 6512 } 6513 6514 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 { 6515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6516 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6517 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 6518 // CHECK: ret <4 x i16> [[VRSHRN_N1]] 6519 int16x4_t test_vrshrn_n_s32(int32x4_t a) { 6520 return vrshrn_n_s32(a, 9); 6521 } 6522 6523 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 { 6524 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6525 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6526 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 6527 // CHECK: ret <2 x i32> [[VRSHRN_N1]] 6528 int32x2_t test_vrshrn_n_s64(int64x2_t a) { 6529 return vrshrn_n_s64(a, 19); 6530 } 6531 6532 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 { 6533 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6534 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6535 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 6536 // CHECK: ret <8 x i8> [[VRSHRN_N1]] 6537 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { 6538 return vrshrn_n_u16(a, 3); 6539 } 6540 6541 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 { 6542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6543 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6544 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 6545 // CHECK: ret <4 x i16> [[VRSHRN_N1]] 6546 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { 6547 return vrshrn_n_u32(a, 9); 6548 } 6549 6550 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 { 6551 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6552 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6553 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 6554 // CHECK: ret <2 x i32> [[VRSHRN_N1]] 6555 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { 6556 return vrshrn_n_u64(a, 19); 6557 } 6558 6559 // CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6560 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6561 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6562 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 6563 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6564 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6565 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6566 return vrshrn_high_n_s16(a, b, 3); 6567 } 6568 6569 // CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6570 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6571 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6572 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 6573 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6574 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6575 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6576 return vrshrn_high_n_s32(a, b, 9); 6577 } 6578 6579 // CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6580 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6581 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6582 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 6583 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6584 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6585 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6586 return vrshrn_high_n_s64(a, b, 19); 6587 } 6588 6589 // CHECK-LABEL: define <16 x i8> @test_vrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { 6590 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6591 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6592 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 6593 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6594 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6595 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6596 return vrshrn_high_n_u16(a, b, 3); 6597 } 6598 6599 // CHECK-LABEL: define <8 x i16> @test_vrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { 6600 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6601 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6602 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 6603 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6604 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6605 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6606 return vrshrn_high_n_u32(a, b, 9); 6607 } 6608 6609 // CHECK-LABEL: define <4 x i32> @test_vrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { 6610 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6611 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6612 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 6613 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6614 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6615 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6616 return vrshrn_high_n_u64(a, b, 19); 6617 } 6618 6619 // CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 { 6620 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6621 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6622 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) 6623 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]] 6624 int8x8_t test_vqrshrun_n_s16(int16x8_t a) { 6625 return vqrshrun_n_s16(a, 3); 6626 } 6627 6628 // CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 { 6629 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6630 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6631 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) 6632 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]] 6633 int16x4_t test_vqrshrun_n_s32(int32x4_t a) { 6634 return vqrshrun_n_s32(a, 9); 6635 } 6636 6637 // CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 { 6638 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6639 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6640 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) 6641 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]] 6642 int32x2_t test_vqrshrun_n_s64(int64x2_t a) { 6643 return vqrshrun_n_s64(a, 19); 6644 } 6645 6646 // CHECK-LABEL: define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6647 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6648 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6649 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) 6650 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6651 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6652 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) { 6653 return vqrshrun_high_n_s16(a, b, 3); 6654 } 6655 6656 // CHECK-LABEL: define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6657 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6658 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6659 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) 6660 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6661 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6662 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) { 6663 return vqrshrun_high_n_s32(a, b, 9); 6664 } 6665 6666 // CHECK-LABEL: define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6668 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6669 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) 6670 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6671 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6672 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) { 6673 return vqrshrun_high_n_s64(a, b, 19); 6674 } 6675 6676 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 { 6677 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6678 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6679 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6680 // CHECK: ret <8 x i8> [[VQSHRN_N1]] 6681 int8x8_t test_vqshrn_n_s16(int16x8_t a) { 6682 return vqshrn_n_s16(a, 3); 6683 } 6684 6685 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 { 6686 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6687 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6688 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6689 // CHECK: ret <4 x i16> [[VQSHRN_N1]] 6690 int16x4_t test_vqshrn_n_s32(int32x4_t a) { 6691 return vqshrn_n_s32(a, 9); 6692 } 6693 6694 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 { 6695 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6696 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6697 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6698 // CHECK: ret <2 x i32> [[VQSHRN_N1]] 6699 int32x2_t test_vqshrn_n_s64(int64x2_t a) { 6700 return vqshrn_n_s64(a, 19); 6701 } 6702 6703 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 { 6704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6705 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6706 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6707 // CHECK: ret <8 x i8> [[VQSHRN_N1]] 6708 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { 6709 return vqshrn_n_u16(a, 3); 6710 } 6711 6712 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 { 6713 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6714 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6715 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6716 // CHECK: ret <4 x i16> [[VQSHRN_N1]] 6717 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { 6718 return vqshrn_n_u32(a, 9); 6719 } 6720 6721 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 { 6722 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6723 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6724 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6725 // CHECK: ret <2 x i32> [[VQSHRN_N1]] 6726 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { 6727 return vqshrn_n_u64(a, 19); 6728 } 6729 6730 // CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6731 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6732 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6733 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6734 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6735 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6736 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6737 return vqshrn_high_n_s16(a, b, 3); 6738 } 6739 6740 // CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6741 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6742 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6743 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6744 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6745 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6746 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6747 return vqshrn_high_n_s32(a, b, 9); 6748 } 6749 6750 // CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6751 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6752 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6753 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6754 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6755 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6756 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6757 return vqshrn_high_n_s64(a, b, 19); 6758 } 6759 6760 // CHECK-LABEL: define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { 6761 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6762 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6763 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6764 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6765 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6766 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6767 return vqshrn_high_n_u16(a, b, 3); 6768 } 6769 6770 // CHECK-LABEL: define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { 6771 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6772 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6773 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6774 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6775 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6776 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6777 return vqshrn_high_n_u32(a, b, 9); 6778 } 6779 6780 // CHECK-LABEL: define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { 6781 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6782 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6783 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6784 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6785 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6786 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6787 return vqshrn_high_n_u64(a, b, 19); 6788 } 6789 6790 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 { 6791 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6792 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6793 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6794 // CHECK: ret <8 x i8> [[VQRSHRN_N1]] 6795 int8x8_t test_vqrshrn_n_s16(int16x8_t a) { 6796 return vqrshrn_n_s16(a, 3); 6797 } 6798 6799 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 { 6800 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6801 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6802 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6803 // CHECK: ret <4 x i16> [[VQRSHRN_N1]] 6804 int16x4_t test_vqrshrn_n_s32(int32x4_t a) { 6805 return vqrshrn_n_s32(a, 9); 6806 } 6807 6808 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 { 6809 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6810 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6811 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6812 // CHECK: ret <2 x i32> [[VQRSHRN_N1]] 6813 int32x2_t test_vqrshrn_n_s64(int64x2_t a) { 6814 return vqrshrn_n_s64(a, 19); 6815 } 6816 6817 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 { 6818 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6819 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6820 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6821 // CHECK: ret <8 x i8> [[VQRSHRN_N1]] 6822 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { 6823 return vqrshrn_n_u16(a, 3); 6824 } 6825 6826 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 { 6827 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6828 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6829 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6830 // CHECK: ret <4 x i16> [[VQRSHRN_N1]] 6831 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { 6832 return vqrshrn_n_u32(a, 9); 6833 } 6834 6835 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 { 6836 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6837 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6838 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6839 // CHECK: ret <2 x i32> [[VQRSHRN_N1]] 6840 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { 6841 return vqrshrn_n_u64(a, 19); 6842 } 6843 6844 // CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) #0 { 6845 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6846 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6847 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6848 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6849 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6850 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6851 return vqrshrn_high_n_s16(a, b, 3); 6852 } 6853 6854 // CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) #0 { 6855 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6856 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6857 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6858 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6859 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6860 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6861 return vqrshrn_high_n_s32(a, b, 9); 6862 } 6863 6864 // CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) #0 { 6865 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6866 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6867 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6868 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6869 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6870 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6871 return vqrshrn_high_n_s64(a, b, 19); 6872 } 6873 6874 // CHECK-LABEL: define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) #0 { 6875 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6876 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6877 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6878 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6879 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6880 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6881 return vqrshrn_high_n_u16(a, b, 3); 6882 } 6883 6884 // CHECK-LABEL: define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) #0 { 6885 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6886 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6887 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6888 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6889 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6890 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6891 return vqrshrn_high_n_u32(a, b, 9); 6892 } 6893 6894 // CHECK-LABEL: define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) #0 { 6895 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6896 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6897 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6898 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6899 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6900 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6901 return vqrshrn_high_n_u64(a, b, 19); 6902 } 6903 6904 // CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { 6905 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> 6906 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6907 // CHECK: ret <8 x i16> [[VSHLL_N]] 6908 int16x8_t test_vshll_n_s8(int8x8_t a) { 6909 return vshll_n_s8(a, 3); 6910 } 6911 6912 // CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { 6913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6914 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6915 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 6916 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9> 6917 // CHECK: ret <4 x i32> [[VSHLL_N]] 6918 int32x4_t test_vshll_n_s16(int16x4_t a) { 6919 return vshll_n_s16(a, 9); 6920 } 6921 6922 // CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { 6923 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6924 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6925 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 6926 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19> 6927 // CHECK: ret <2 x i64> [[VSHLL_N]] 6928 int64x2_t test_vshll_n_s32(int32x2_t a) { 6929 return vshll_n_s32(a, 19); 6930 } 6931 6932 // CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { 6933 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> 6934 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6935 // CHECK: ret <8 x i16> [[VSHLL_N]] 6936 uint16x8_t test_vshll_n_u8(uint8x8_t a) { 6937 return vshll_n_u8(a, 3); 6938 } 6939 6940 // CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { 6941 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6942 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6943 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 6944 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9> 6945 // CHECK: ret <4 x i32> [[VSHLL_N]] 6946 uint32x4_t test_vshll_n_u16(uint16x4_t a) { 6947 return vshll_n_u16(a, 9); 6948 } 6949 6950 // CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { 6951 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6952 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6953 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 6954 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19> 6955 // CHECK: ret <2 x i64> [[VSHLL_N]] 6956 uint64x2_t test_vshll_n_u32(uint32x2_t a) { 6957 return vshll_n_u32(a, 19); 6958 } 6959 6960 // CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 { 6961 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6962 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> 6963 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6964 // CHECK: ret <8 x i16> [[VSHLL_N]] 6965 int16x8_t test_vshll_high_n_s8(int8x16_t a) { 6966 return vshll_high_n_s8(a, 3); 6967 } 6968 6969 // CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 { 6970 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6971 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 6972 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6973 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 6974 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9> 6975 // CHECK: ret <4 x i32> [[VSHLL_N]] 6976 int32x4_t test_vshll_high_n_s16(int16x8_t a) { 6977 return vshll_high_n_s16(a, 9); 6978 } 6979 6980 // CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 { 6981 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6982 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 6983 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6984 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 6985 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19> 6986 // CHECK: ret <2 x i64> [[VSHLL_N]] 6987 int64x2_t test_vshll_high_n_s32(int32x4_t a) { 6988 return vshll_high_n_s32(a, 19); 6989 } 6990 6991 // CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 { 6992 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6993 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> 6994 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 6995 // CHECK: ret <8 x i16> [[VSHLL_N]] 6996 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { 6997 return vshll_high_n_u8(a, 3); 6998 } 6999 7000 // CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 { 7001 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7002 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 7003 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7004 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7005 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9> 7006 // CHECK: ret <4 x i32> [[VSHLL_N]] 7007 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { 7008 return vshll_high_n_u16(a, 9); 7009 } 7010 7011 // CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 { 7012 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7013 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 7014 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7015 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7016 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19> 7017 // CHECK: ret <2 x i64> [[VSHLL_N]] 7018 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { 7019 return vshll_high_n_u32(a, 19); 7020 } 7021 7022 // CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 { 7023 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16> 7024 // CHECK: ret <8 x i16> [[VMOVL_I]] 7025 int16x8_t test_vmovl_s8(int8x8_t a) { 7026 return vmovl_s8(a); 7027 } 7028 7029 // CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 { 7030 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7031 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7032 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7033 // CHECK: ret <4 x i32> [[VMOVL_I]] 7034 int32x4_t test_vmovl_s16(int16x4_t a) { 7035 return vmovl_s16(a); 7036 } 7037 7038 // CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 { 7039 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7040 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7041 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7042 // CHECK: ret <2 x i64> [[VMOVL_I]] 7043 int64x2_t test_vmovl_s32(int32x2_t a) { 7044 return vmovl_s32(a); 7045 } 7046 7047 // CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 { 7048 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16> 7049 // CHECK: ret <8 x i16> [[VMOVL_I]] 7050 uint16x8_t test_vmovl_u8(uint8x8_t a) { 7051 return vmovl_u8(a); 7052 } 7053 7054 // CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 { 7055 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7056 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7057 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7058 // CHECK: ret <4 x i32> [[VMOVL_I]] 7059 uint32x4_t test_vmovl_u16(uint16x4_t a) { 7060 return vmovl_u16(a); 7061 } 7062 7063 // CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 { 7064 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7065 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7066 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7067 // CHECK: ret <2 x i64> [[VMOVL_I]] 7068 uint64x2_t test_vmovl_u32(uint32x2_t a) { 7069 return vmovl_u32(a); 7070 } 7071 7072 // CHECK-LABEL: define <8 x i16> @test_vmovl_high_s8(<16 x i8> %a) #0 { 7073 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7074 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16> 7075 // CHECK: ret <8 x i16> [[TMP0]] 7076 int16x8_t test_vmovl_high_s8(int8x16_t a) { 7077 return vmovl_high_s8(a); 7078 } 7079 7080 // CHECK-LABEL: define <4 x i32> @test_vmovl_high_s16(<8 x i16> %a) #0 { 7081 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7083 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7084 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7085 // CHECK: ret <4 x i32> [[TMP2]] 7086 int32x4_t test_vmovl_high_s16(int16x8_t a) { 7087 return vmovl_high_s16(a); 7088 } 7089 7090 // CHECK-LABEL: define <2 x i64> @test_vmovl_high_s32(<4 x i32> %a) #0 { 7091 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7092 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7093 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7094 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7095 // CHECK: ret <2 x i64> [[TMP2]] 7096 int64x2_t test_vmovl_high_s32(int32x4_t a) { 7097 return vmovl_high_s32(a); 7098 } 7099 7100 // CHECK-LABEL: define <8 x i16> @test_vmovl_high_u8(<16 x i8> %a) #0 { 7101 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7102 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16> 7103 // CHECK: ret <8 x i16> [[TMP0]] 7104 uint16x8_t test_vmovl_high_u8(uint8x16_t a) { 7105 return vmovl_high_u8(a); 7106 } 7107 7108 // CHECK-LABEL: define <4 x i32> @test_vmovl_high_u16(<8 x i16> %a) #0 { 7109 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7110 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7111 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7112 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7113 // CHECK: ret <4 x i32> [[TMP2]] 7114 uint32x4_t test_vmovl_high_u16(uint16x8_t a) { 7115 return vmovl_high_u16(a); 7116 } 7117 7118 // CHECK-LABEL: define <2 x i64> @test_vmovl_high_u32(<4 x i32> %a) #0 { 7119 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7120 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7121 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7122 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7123 // CHECK: ret <2 x i64> [[TMP2]] 7124 uint64x2_t test_vmovl_high_u32(uint32x4_t a) { 7125 return vmovl_high_u32(a); 7126 } 7127 7128 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 { 7129 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7130 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7131 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) 7132 // CHECK: ret <2 x float> [[VCVT_N1]] 7133 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { 7134 return vcvt_n_f32_s32(a, 31); 7135 } 7136 7137 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 { 7138 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7139 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7140 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) 7141 // CHECK: ret <4 x float> [[VCVT_N1]] 7142 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { 7143 return vcvtq_n_f32_s32(a, 31); 7144 } 7145 7146 // CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) #0 { 7147 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7148 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7149 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) 7150 // CHECK: ret <2 x double> [[VCVT_N1]] 7151 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) { 7152 return vcvtq_n_f64_s64(a, 50); 7153 } 7154 7155 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 { 7156 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7157 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7158 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) 7159 // CHECK: ret <2 x float> [[VCVT_N1]] 7160 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { 7161 return vcvt_n_f32_u32(a, 31); 7162 } 7163 7164 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 { 7165 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7166 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7167 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) 7168 // CHECK: ret <4 x float> [[VCVT_N1]] 7169 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { 7170 return vcvtq_n_f32_u32(a, 31); 7171 } 7172 7173 // CHECK-LABEL: define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) #0 { 7174 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7175 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7176 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) 7177 // CHECK: ret <2 x double> [[VCVT_N1]] 7178 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) { 7179 return vcvtq_n_f64_u64(a, 50); 7180 } 7181 7182 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 { 7183 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 7184 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 7185 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) 7186 // CHECK: ret <2 x i32> [[VCVT_N1]] 7187 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { 7188 return vcvt_n_s32_f32(a, 31); 7189 } 7190 7191 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 { 7192 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 7193 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 7194 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) 7195 // CHECK: ret <4 x i32> [[VCVT_N1]] 7196 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { 7197 return vcvtq_n_s32_f32(a, 31); 7198 } 7199 7200 // CHECK-LABEL: define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) #0 { 7201 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 7202 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 7203 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) 7204 // CHECK: ret <2 x i64> [[VCVT_N1]] 7205 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) { 7206 return vcvtq_n_s64_f64(a, 50); 7207 } 7208 7209 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 { 7210 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 7211 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 7212 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) 7213 // CHECK: ret <2 x i32> [[VCVT_N1]] 7214 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { 7215 return vcvt_n_u32_f32(a, 31); 7216 } 7217 7218 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 { 7219 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 7220 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 7221 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) 7222 // CHECK: ret <4 x i32> [[VCVT_N1]] 7223 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { 7224 return vcvtq_n_u32_f32(a, 31); 7225 } 7226 7227 // CHECK-LABEL: define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) #0 { 7228 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 7229 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 7230 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) 7231 // CHECK: ret <2 x i64> [[VCVT_N1]] 7232 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) { 7233 return vcvtq_n_u64_f64(a, 50); 7234 } 7235 7236 // CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 7237 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 7238 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 7239 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7240 // CHECK: ret <8 x i16> [[ADD_I]] 7241 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { 7242 return vaddl_s8(a, b); 7243 } 7244 7245 // CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 7246 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7247 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7248 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7249 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7250 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 7251 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> 7252 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7253 // CHECK: ret <4 x i32> [[ADD_I]] 7254 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { 7255 return vaddl_s16(a, b); 7256 } 7257 7258 // CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 7259 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7260 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7261 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7262 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7263 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 7264 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> 7265 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7266 // CHECK: ret <2 x i64> [[ADD_I]] 7267 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { 7268 return vaddl_s32(a, b); 7269 } 7270 7271 // CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 7272 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 7273 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 7274 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7275 // CHECK: ret <8 x i16> [[ADD_I]] 7276 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { 7277 return vaddl_u8(a, b); 7278 } 7279 7280 // CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 7281 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7282 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7283 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7284 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7285 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 7286 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 7287 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7288 // CHECK: ret <4 x i32> [[ADD_I]] 7289 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { 7290 return vaddl_u16(a, b); 7291 } 7292 7293 // CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 7294 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7295 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7296 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7297 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7298 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 7299 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 7300 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7301 // CHECK: ret <2 x i64> [[ADD_I]] 7302 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { 7303 return vaddl_u32(a, b); 7304 } 7305 7306 // CHECK-LABEL: define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { 7307 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7308 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7309 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7310 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 7311 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] 7312 // CHECK: ret <8 x i16> [[ADD_I]] 7313 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) { 7314 return vaddl_high_s8(a, b); 7315 } 7316 7317 // CHECK-LABEL: define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 7318 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7319 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7320 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7321 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7322 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7323 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 7324 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 7325 // CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> 7326 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] 7327 // CHECK: ret <4 x i32> [[ADD_I]] 7328 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) { 7329 return vaddl_high_s16(a, b); 7330 } 7331 7332 // CHECK-LABEL: define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 7333 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7334 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7335 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7336 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7337 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7338 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 7339 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 7340 // CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> 7341 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] 7342 // CHECK: ret <2 x i64> [[ADD_I]] 7343 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) { 7344 return vaddl_high_s32(a, b); 7345 } 7346 7347 // CHECK-LABEL: define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { 7348 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7349 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7350 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7351 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 7352 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] 7353 // CHECK: ret <8 x i16> [[ADD_I]] 7354 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) { 7355 return vaddl_high_u8(a, b); 7356 } 7357 7358 // CHECK-LABEL: define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { 7359 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7360 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7361 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7362 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7363 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7364 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 7365 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 7366 // CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 7367 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] 7368 // CHECK: ret <4 x i32> [[ADD_I]] 7369 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) { 7370 return vaddl_high_u16(a, b); 7371 } 7372 7373 // CHECK-LABEL: define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { 7374 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7375 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7376 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7377 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7378 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7379 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 7380 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 7381 // CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 7382 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] 7383 // CHECK: ret <2 x i64> [[ADD_I]] 7384 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) { 7385 return vaddl_high_u32(a, b); 7386 } 7387 7388 // CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 { 7389 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 7390 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 7391 // CHECK: ret <8 x i16> [[ADD_I]] 7392 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { 7393 return vaddw_s8(a, b); 7394 } 7395 7396 // CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 { 7397 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7398 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7399 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7400 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 7401 // CHECK: ret <4 x i32> [[ADD_I]] 7402 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { 7403 return vaddw_s16(a, b); 7404 } 7405 7406 // CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 { 7407 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7408 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7409 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7410 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 7411 // CHECK: ret <2 x i64> [[ADD_I]] 7412 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { 7413 return vaddw_s32(a, b); 7414 } 7415 7416 // CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 { 7417 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 7418 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 7419 // CHECK: ret <8 x i16> [[ADD_I]] 7420 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { 7421 return vaddw_u8(a, b); 7422 } 7423 7424 // CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 { 7425 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7426 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7427 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7428 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 7429 // CHECK: ret <4 x i32> [[ADD_I]] 7430 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { 7431 return vaddw_u16(a, b); 7432 } 7433 7434 // CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 { 7435 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7436 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7437 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7438 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 7439 // CHECK: ret <2 x i64> [[ADD_I]] 7440 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { 7441 return vaddw_u32(a, b); 7442 } 7443 7444 // CHECK-LABEL: define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 { 7445 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7446 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7447 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]] 7448 // CHECK: ret <8 x i16> [[ADD_I]] 7449 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) { 7450 return vaddw_high_s8(a, b); 7451 } 7452 7453 // CHECK-LABEL: define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 { 7454 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7455 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7456 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7457 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7458 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] 7459 // CHECK: ret <4 x i32> [[ADD_I]] 7460 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) { 7461 return vaddw_high_s16(a, b); 7462 } 7463 7464 // CHECK-LABEL: define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 { 7465 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7466 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7467 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7468 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7469 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]] 7470 // CHECK: ret <2 x i64> [[ADD_I]] 7471 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) { 7472 return vaddw_high_s32(a, b); 7473 } 7474 7475 // CHECK-LABEL: define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 { 7476 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7477 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7478 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]] 7479 // CHECK: ret <8 x i16> [[ADD_I]] 7480 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) { 7481 return vaddw_high_u8(a, b); 7482 } 7483 7484 // CHECK-LABEL: define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 { 7485 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7486 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7487 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7488 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7489 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] 7490 // CHECK: ret <4 x i32> [[ADD_I]] 7491 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) { 7492 return vaddw_high_u16(a, b); 7493 } 7494 7495 // CHECK-LABEL: define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 { 7496 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7497 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7498 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7499 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7500 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP2]] 7501 // CHECK: ret <2 x i64> [[ADD_I]] 7502 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) { 7503 return vaddw_high_u32(a, b); 7504 } 7505 7506 // CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 7507 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 7508 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 7509 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7510 // CHECK: ret <8 x i16> [[SUB_I]] 7511 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { 7512 return vsubl_s8(a, b); 7513 } 7514 7515 // CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 7516 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7517 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7518 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7519 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7520 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 7521 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> 7522 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7523 // CHECK: ret <4 x i32> [[SUB_I]] 7524 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { 7525 return vsubl_s16(a, b); 7526 } 7527 7528 // CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 7529 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7530 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7531 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7532 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7533 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 7534 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> 7535 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7536 // CHECK: ret <2 x i64> [[SUB_I]] 7537 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { 7538 return vsubl_s32(a, b); 7539 } 7540 7541 // CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 7542 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 7543 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 7544 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7545 // CHECK: ret <8 x i16> [[SUB_I]] 7546 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { 7547 return vsubl_u8(a, b); 7548 } 7549 7550 // CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 7551 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7552 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7553 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7554 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7555 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 7556 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 7557 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7558 // CHECK: ret <4 x i32> [[SUB_I]] 7559 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { 7560 return vsubl_u16(a, b); 7561 } 7562 7563 // CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 7564 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7565 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7566 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7567 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7568 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 7569 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 7570 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 7571 // CHECK: ret <2 x i64> [[SUB_I]] 7572 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { 7573 return vsubl_u32(a, b); 7574 } 7575 7576 // CHECK-LABEL: define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { 7577 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7578 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7579 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7580 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 7581 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] 7582 // CHECK: ret <8 x i16> [[SUB_I]] 7583 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { 7584 return vsubl_high_s8(a, b); 7585 } 7586 7587 // CHECK-LABEL: define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 7588 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7589 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7590 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7591 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7592 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7593 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 7594 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 7595 // CHECK: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> 7596 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] 7597 // CHECK: ret <4 x i32> [[SUB_I]] 7598 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { 7599 return vsubl_high_s16(a, b); 7600 } 7601 7602 // CHECK-LABEL: define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 7603 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7604 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7605 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7606 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7607 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7608 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 7609 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 7610 // CHECK: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> 7611 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] 7612 // CHECK: ret <2 x i64> [[SUB_I]] 7613 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { 7614 return vsubl_high_s32(a, b); 7615 } 7616 7617 // CHECK-LABEL: define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { 7618 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7619 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7620 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7621 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 7622 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] 7623 // CHECK: ret <8 x i16> [[SUB_I]] 7624 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { 7625 return vsubl_high_u8(a, b); 7626 } 7627 7628 // CHECK-LABEL: define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { 7629 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7630 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7631 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7632 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7633 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7634 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 7635 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 7636 // CHECK: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 7637 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] 7638 // CHECK: ret <4 x i32> [[SUB_I]] 7639 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { 7640 return vsubl_high_u16(a, b); 7641 } 7642 7643 // CHECK-LABEL: define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { 7644 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7645 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7646 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7647 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7648 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7649 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 7650 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 7651 // CHECK: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 7652 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] 7653 // CHECK: ret <2 x i64> [[SUB_I]] 7654 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { 7655 return vsubl_high_u32(a, b); 7656 } 7657 7658 // CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 { 7659 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 7660 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 7661 // CHECK: ret <8 x i16> [[SUB_I]] 7662 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { 7663 return vsubw_s8(a, b); 7664 } 7665 7666 // CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 { 7667 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7668 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7669 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7670 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 7671 // CHECK: ret <4 x i32> [[SUB_I]] 7672 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { 7673 return vsubw_s16(a, b); 7674 } 7675 7676 // CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 { 7677 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7678 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7679 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7680 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 7681 // CHECK: ret <2 x i64> [[SUB_I]] 7682 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { 7683 return vsubw_s32(a, b); 7684 } 7685 7686 // CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 { 7687 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 7688 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 7689 // CHECK: ret <8 x i16> [[SUB_I]] 7690 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { 7691 return vsubw_u8(a, b); 7692 } 7693 7694 // CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 { 7695 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7696 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7697 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7698 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 7699 // CHECK: ret <4 x i32> [[SUB_I]] 7700 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { 7701 return vsubw_u16(a, b); 7702 } 7703 7704 // CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 { 7705 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7706 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7707 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7708 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 7709 // CHECK: ret <2 x i64> [[SUB_I]] 7710 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { 7711 return vsubw_u32(a, b); 7712 } 7713 7714 // CHECK-LABEL: define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) #0 { 7715 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7716 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7717 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]] 7718 // CHECK: ret <8 x i16> [[SUB_I]] 7719 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { 7720 return vsubw_high_s8(a, b); 7721 } 7722 7723 // CHECK-LABEL: define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) #0 { 7724 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7725 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7726 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7727 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 7728 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]] 7729 // CHECK: ret <4 x i32> [[SUB_I]] 7730 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { 7731 return vsubw_high_s16(a, b); 7732 } 7733 7734 // CHECK-LABEL: define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) #0 { 7735 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7736 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7737 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7738 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 7739 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]] 7740 // CHECK: ret <2 x i64> [[SUB_I]] 7741 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { 7742 return vsubw_high_s32(a, b); 7743 } 7744 7745 // CHECK-LABEL: define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) #0 { 7746 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7747 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7748 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]] 7749 // CHECK: ret <8 x i16> [[SUB_I]] 7750 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { 7751 return vsubw_high_u8(a, b); 7752 } 7753 7754 // CHECK-LABEL: define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) #0 { 7755 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7756 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7757 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 7758 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 7759 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP2]] 7760 // CHECK: ret <4 x i32> [[SUB_I]] 7761 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { 7762 return vsubw_high_u16(a, b); 7763 } 7764 7765 // CHECK-LABEL: define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) #0 { 7766 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7767 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7768 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 7769 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 7770 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP2]] 7771 // CHECK: ret <2 x i64> [[SUB_I]] 7772 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { 7773 return vsubw_high_u32(a, b); 7774 } 7775 7776 // CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 7777 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7778 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7779 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7780 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7781 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 7782 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 7783 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 7784 // CHECK: ret <8 x i8> [[VADDHN2_I]] 7785 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { 7786 return vaddhn_s16(a, b); 7787 } 7788 7789 // CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 7790 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7791 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7792 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7793 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7794 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 7795 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16> 7796 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 7797 // CHECK: ret <4 x i16> [[VADDHN2_I]] 7798 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { 7799 return vaddhn_s32(a, b); 7800 } 7801 7802 // CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 7803 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7804 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7805 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7806 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7807 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 7808 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32> 7809 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 7810 // CHECK: ret <2 x i32> [[VADDHN2_I]] 7811 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { 7812 return vaddhn_s64(a, b); 7813 } 7814 7815 // CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 7816 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7817 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7818 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7819 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7820 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 7821 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 7822 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 7823 // CHECK: ret <8 x i8> [[VADDHN2_I]] 7824 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { 7825 return vaddhn_u16(a, b); 7826 } 7827 7828 // CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 7829 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7830 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7831 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7832 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7833 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 7834 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16> 7835 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 7836 // CHECK: ret <4 x i16> [[VADDHN2_I]] 7837 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { 7838 return vaddhn_u32(a, b); 7839 } 7840 7841 // CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 7842 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7843 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7844 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7845 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7846 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 7847 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32> 7848 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 7849 // CHECK: ret <2 x i32> [[VADDHN2_I]] 7850 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { 7851 return vaddhn_u64(a, b); 7852 } 7853 7854 // CHECK-LABEL: define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 7855 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7856 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7857 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7858 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7859 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 7860 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 7861 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> 7862 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7863 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7864 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 7865 return vaddhn_high_s16(r, a, b); 7866 } 7867 7868 // CHECK-LABEL: define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 7869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7870 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7871 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7872 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7873 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 7874 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16> 7875 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> 7876 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7877 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7878 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 7879 return vaddhn_high_s32(r, a, b); 7880 } 7881 7882 // CHECK-LABEL: define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 7883 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7884 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7885 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7886 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7887 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 7888 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32> 7889 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> 7890 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7891 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7892 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 7893 return vaddhn_high_s64(r, a, b); 7894 } 7895 7896 // CHECK-LABEL: define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 7897 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7898 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7899 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7900 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7901 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 7902 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 7903 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> 7904 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7905 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7906 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 7907 return vaddhn_high_u16(r, a, b); 7908 } 7909 7910 // CHECK-LABEL: define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 7911 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7912 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7913 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7914 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7915 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 7916 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16> 7917 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> 7918 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7919 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7920 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 7921 return vaddhn_high_u32(r, a, b); 7922 } 7923 7924 // CHECK-LABEL: define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 7925 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7926 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7927 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7928 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7929 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 7930 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32> 7931 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> 7932 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7933 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7934 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 7935 return vaddhn_high_u64(r, a, b); 7936 } 7937 7938 // CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 7939 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7940 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7941 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7942 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7943 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 7944 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 7945 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { 7946 return vraddhn_s16(a, b); 7947 } 7948 7949 // CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 7950 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7951 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7952 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7953 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7954 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 7955 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 7956 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> 7957 // CHECK: ret <4 x i16> [[TMP2]] 7958 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { 7959 return vraddhn_s32(a, b); 7960 } 7961 7962 // CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 7963 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7964 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7965 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 7966 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 7967 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 7968 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 7969 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> 7970 // CHECK: ret <2 x i32> [[TMP2]] 7971 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { 7972 return vraddhn_s64(a, b); 7973 } 7974 7975 // CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 7976 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7977 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7978 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 7979 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 7980 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 7981 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 7982 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { 7983 return vraddhn_u16(a, b); 7984 } 7985 7986 // CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 7987 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7988 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7989 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 7990 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 7991 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 7992 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 7993 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> 7994 // CHECK: ret <4 x i16> [[TMP2]] 7995 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { 7996 return vraddhn_u32(a, b); 7997 } 7998 7999 // CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 8000 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8001 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8002 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8003 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8004 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 8005 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 8006 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> 8007 // CHECK: ret <2 x i32> [[TMP2]] 8008 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { 8009 return vraddhn_u64(a, b); 8010 } 8011 8012 // CHECK-LABEL: define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8013 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8014 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8015 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8016 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8017 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4 8018 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8019 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8020 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 8021 return vraddhn_high_s16(r, a, b); 8022 } 8023 8024 // CHECK-LABEL: define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8025 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8026 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8027 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8028 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8029 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4 8030 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> 8031 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16> 8032 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8033 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8034 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 8035 return vraddhn_high_s32(r, a, b); 8036 } 8037 8038 // CHECK-LABEL: define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8039 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8040 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8041 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8042 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8043 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4 8044 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> 8045 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32> 8046 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8047 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8048 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 8049 return vraddhn_high_s64(r, a, b); 8050 } 8051 8052 // CHECK-LABEL: define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8053 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8054 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8055 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8056 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8057 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[VRADDHN_V_I_I]], <8 x i16> [[VRADDHN_V1_I_I]]) #4 8058 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8059 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8060 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 8061 return vraddhn_high_u16(r, a, b); 8062 } 8063 8064 // CHECK-LABEL: define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8065 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8066 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8067 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8068 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8069 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[VRADDHN_V_I_I]], <4 x i32> [[VRADDHN_V1_I_I]]) #4 8070 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> 8071 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <4 x i16> 8072 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8073 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8074 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 8075 return vraddhn_high_u32(r, a, b); 8076 } 8077 8078 // CHECK-LABEL: define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8080 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8081 // CHECK: [[VRADDHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8082 // CHECK: [[VRADDHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8083 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[VRADDHN_V_I_I]], <2 x i64> [[VRADDHN_V1_I_I]]) #4 8084 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> 8085 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I_I]] to <2 x i32> 8086 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8087 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8088 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 8089 return vraddhn_high_u64(r, a, b); 8090 } 8091 8092 // CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8093 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8094 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8095 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8096 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8097 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 8098 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 8099 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 8100 // CHECK: ret <8 x i8> [[VSUBHN2_I]] 8101 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { 8102 return vsubhn_s16(a, b); 8103 } 8104 8105 // CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8106 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8107 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8108 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8109 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8110 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 8111 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16> 8112 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 8113 // CHECK: ret <4 x i16> [[VSUBHN2_I]] 8114 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { 8115 return vsubhn_s32(a, b); 8116 } 8117 8118 // CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 8119 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8120 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8121 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8122 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8123 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 8124 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32> 8125 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 8126 // CHECK: ret <2 x i32> [[VSUBHN2_I]] 8127 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { 8128 return vsubhn_s64(a, b); 8129 } 8130 8131 // CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8132 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8133 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8134 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8135 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8136 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 8137 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 8138 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 8139 // CHECK: ret <8 x i8> [[VSUBHN2_I]] 8140 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { 8141 return vsubhn_u16(a, b); 8142 } 8143 8144 // CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8145 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8146 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8147 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8148 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8149 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 8150 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16> 8151 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 8152 // CHECK: ret <4 x i16> [[VSUBHN2_I]] 8153 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { 8154 return vsubhn_u32(a, b); 8155 } 8156 8157 // CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 8158 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8159 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8160 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8161 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8162 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 8163 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32> 8164 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 8165 // CHECK: ret <2 x i32> [[VSUBHN2_I]] 8166 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { 8167 return vsubhn_u64(a, b); 8168 } 8169 8170 // CHECK-LABEL: define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8171 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8172 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8173 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8174 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8175 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 8176 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 8177 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> 8178 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8179 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8180 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 8181 return vsubhn_high_s16(r, a, b); 8182 } 8183 8184 // CHECK-LABEL: define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8185 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8186 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8187 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8188 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8189 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 8190 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16> 8191 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> 8192 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8193 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8194 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 8195 return vsubhn_high_s32(r, a, b); 8196 } 8197 8198 // CHECK-LABEL: define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8199 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8200 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8201 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8202 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8203 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 8204 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32> 8205 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> 8206 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8207 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8208 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 8209 return vsubhn_high_s64(r, a, b); 8210 } 8211 8212 // CHECK-LABEL: define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8213 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8214 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8215 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8216 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8217 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 8218 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 8219 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> 8220 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8221 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8222 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 8223 return vsubhn_high_u16(r, a, b); 8224 } 8225 8226 // CHECK-LABEL: define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8227 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8228 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8229 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8230 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8231 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 8232 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16> 8233 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> 8234 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8235 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8236 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 8237 return vsubhn_high_u32(r, a, b); 8238 } 8239 8240 // CHECK-LABEL: define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8241 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8242 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8243 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8244 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8245 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 8246 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32> 8247 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> 8248 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8249 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8250 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 8251 return vsubhn_high_u64(r, a, b); 8252 } 8253 8254 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8255 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8256 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8257 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8258 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8259 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 8260 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 8261 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { 8262 return vrsubhn_s16(a, b); 8263 } 8264 8265 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8266 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8267 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8268 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8269 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8270 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 8271 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 8272 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> 8273 // CHECK: ret <4 x i16> [[TMP2]] 8274 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { 8275 return vrsubhn_s32(a, b); 8276 } 8277 8278 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 8279 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8280 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8281 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8282 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8283 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 8284 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 8285 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> 8286 // CHECK: ret <2 x i32> [[TMP2]] 8287 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { 8288 return vrsubhn_s64(a, b); 8289 } 8290 8291 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8292 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8293 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8294 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8295 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8296 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 8297 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 8298 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { 8299 return vrsubhn_u16(a, b); 8300 } 8301 8302 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8303 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8304 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8305 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8306 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8307 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 8308 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 8309 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> 8310 // CHECK: ret <4 x i16> [[TMP2]] 8311 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { 8312 return vrsubhn_u32(a, b); 8313 } 8314 8315 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 8316 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8317 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8318 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8319 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8320 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 8321 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 8322 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> 8323 // CHECK: ret <2 x i32> [[TMP2]] 8324 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { 8325 return vrsubhn_u64(a, b); 8326 } 8327 8328 // CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8329 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8330 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8331 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8332 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8333 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4 8334 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8335 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8336 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 8337 return vrsubhn_high_s16(r, a, b); 8338 } 8339 8340 // CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8341 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8342 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8343 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8344 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8345 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4 8346 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> 8347 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16> 8348 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8349 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8350 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 8351 return vrsubhn_high_s32(r, a, b); 8352 } 8353 8354 // CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8355 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8356 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8357 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8358 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8359 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4 8360 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> 8361 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32> 8362 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8363 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8364 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 8365 return vrsubhn_high_s64(r, a, b); 8366 } 8367 8368 // CHECK-LABEL: define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) #0 { 8369 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8370 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8371 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8372 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8373 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I_I]], <8 x i16> [[VRSUBHN_V1_I_I]]) #4 8374 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8375 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 8376 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 8377 return vrsubhn_high_u16(r, a, b); 8378 } 8379 8380 // CHECK-LABEL: define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) #0 { 8381 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8382 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8383 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8384 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8385 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I_I]], <4 x i32> [[VRSUBHN_V1_I_I]]) #4 8386 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> 8387 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <4 x i16> 8388 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8389 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 8390 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 8391 return vrsubhn_high_u32(r, a, b); 8392 } 8393 8394 // CHECK-LABEL: define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) #0 { 8395 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8396 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 8397 // CHECK: [[VRSUBHN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 8398 // CHECK: [[VRSUBHN_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 8399 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I_I]], <2 x i64> [[VRSUBHN_V1_I_I]]) #4 8400 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> 8401 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I_I]] to <2 x i32> 8402 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8403 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 8404 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 8405 return vrsubhn_high_u64(r, a, b); 8406 } 8407 8408 // CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 8409 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8410 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> 8411 // CHECK: ret <8 x i16> [[VMOVL_I_I]] 8412 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { 8413 return vabdl_s8(a, b); 8414 } 8415 // CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 8416 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8417 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8418 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8419 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8420 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 8421 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> 8422 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8423 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8424 // CHECK: ret <4 x i32> [[VMOVL_I_I]] 8425 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { 8426 return vabdl_s16(a, b); 8427 } 8428 // CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 8429 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8430 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8431 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8432 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8433 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 8434 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> 8435 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8436 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8437 // CHECK: ret <2 x i64> [[VMOVL_I_I]] 8438 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { 8439 return vabdl_s32(a, b); 8440 } 8441 // CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 8442 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8443 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> 8444 // CHECK: ret <8 x i16> [[VMOVL_I_I]] 8445 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { 8446 return vabdl_u8(a, b); 8447 } 8448 // CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 8449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8450 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8451 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8452 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8453 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I]], <4 x i16> [[VABD1_I_I]]) #4 8454 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> 8455 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8456 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8457 // CHECK: ret <4 x i32> [[VMOVL_I_I]] 8458 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { 8459 return vabdl_u16(a, b); 8460 } 8461 // CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 8462 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8463 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8464 // CHECK: [[VABD_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8465 // CHECK: [[VABD1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8466 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I]], <2 x i32> [[VABD1_I_I]]) #4 8467 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> 8468 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8469 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8470 // CHECK: ret <2 x i64> [[VMOVL_I_I]] 8471 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { 8472 return vabdl_u32(a, b); 8473 } 8474 8475 // CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8476 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4 8477 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 8478 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 8479 // CHECK: ret <8 x i16> [[ADD_I]] 8480 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8481 return vabal_s8(a, b, c); 8482 } 8483 // CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8484 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8485 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8486 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8487 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8488 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 8489 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 8490 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8491 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8492 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 8493 // CHECK: ret <4 x i32> [[ADD_I]] 8494 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8495 return vabal_s16(a, b, c); 8496 } 8497 // CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8498 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8499 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8500 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8501 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8502 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 8503 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 8504 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8505 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8506 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 8507 // CHECK: ret <2 x i64> [[ADD_I]] 8508 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8509 return vabal_s32(a, b, c); 8510 } 8511 // CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8512 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) #4 8513 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 8514 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 8515 // CHECK: ret <8 x i16> [[ADD_I]] 8516 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8517 return vabal_u8(a, b, c); 8518 } 8519 // CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8520 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8521 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8522 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8523 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8524 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 8525 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 8526 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8527 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8528 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 8529 // CHECK: ret <4 x i32> [[ADD_I]] 8530 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 8531 return vabal_u16(a, b, c); 8532 } 8533 // CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8534 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8535 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8536 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8537 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8538 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 8539 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 8540 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8541 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8542 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 8543 // CHECK: ret <2 x i64> [[ADD_I]] 8544 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 8545 return vabal_u32(a, b, c); 8546 } 8547 8548 // CHECK-LABEL: define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { 8549 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8550 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8551 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8552 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 8553 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]] 8554 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) { 8555 return vabdl_high_s8(a, b); 8556 } 8557 // CHECK-LABEL: define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8558 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8559 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8560 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8561 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8562 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8563 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8564 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 8565 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 8566 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8567 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8568 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]] 8569 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) { 8570 return vabdl_high_s16(a, b); 8571 } 8572 // CHECK-LABEL: define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8573 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8574 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8575 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8576 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8577 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8578 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8579 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 8580 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 8581 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8582 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8583 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]] 8584 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) { 8585 return vabdl_high_s32(a, b); 8586 } 8587 // CHECK-LABEL: define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { 8588 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8589 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8590 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8591 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 8592 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]] 8593 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) { 8594 return vabdl_high_u8(a, b); 8595 } 8596 // CHECK-LABEL: define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8597 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8598 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8599 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8600 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8601 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8602 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8603 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I]], <4 x i16> [[VABD1_I_I_I]]) #4 8604 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 8605 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8606 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8607 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]] 8608 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) { 8609 return vabdl_high_u16(a, b); 8610 } 8611 // CHECK-LABEL: define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8612 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8613 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8614 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8615 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8616 // CHECK: [[VABD_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8617 // CHECK: [[VABD1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8618 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I]], <2 x i32> [[VABD1_I_I_I]]) #4 8619 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 8620 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8621 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8622 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]] 8623 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) { 8624 return vabdl_high_u32(a, b); 8625 } 8626 8627 // CHECK-LABEL: define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8628 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8629 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8630 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8631 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16> 8632 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]] 8633 // CHECK: ret <8 x i16> [[ADD_I_I]] 8634 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 8635 return vabal_high_s8(a, b, c); 8636 } 8637 // CHECK-LABEL: define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8638 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8639 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8640 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8641 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8642 // CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8643 // CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8644 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4 8645 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> 8646 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8647 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8648 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] 8649 // CHECK: ret <4 x i32> [[ADD_I_I]] 8650 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 8651 return vabal_high_s16(a, b, c); 8652 } 8653 // CHECK-LABEL: define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8654 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8655 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8656 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8657 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8658 // CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8659 // CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8660 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4 8661 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> 8662 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8663 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8664 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] 8665 // CHECK: ret <2 x i64> [[ADD_I_I]] 8666 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 8667 return vabal_high_s32(a, b, c); 8668 } 8669 // CHECK-LABEL: define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8670 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8671 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8672 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8673 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16> 8674 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]] 8675 // CHECK: ret <8 x i16> [[ADD_I_I]] 8676 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 8677 return vabal_high_u8(a, b, c); 8678 } 8679 // CHECK-LABEL: define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8680 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8681 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8682 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8683 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8684 // CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8685 // CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8686 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I_I_I_I]], <4 x i16> [[VABD1_I_I_I_I]]) #4 8687 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> 8688 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 8689 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 8690 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] 8691 // CHECK: ret <4 x i32> [[ADD_I_I]] 8692 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 8693 return vabal_high_u16(a, b, c); 8694 } 8695 // CHECK-LABEL: define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8696 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8697 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8699 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8700 // CHECK: [[VABD_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8701 // CHECK: [[VABD1_I_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8702 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I_I_I_I]], <2 x i32> [[VABD1_I_I_I_I]]) #4 8703 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> 8704 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 8705 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 8706 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] 8707 // CHECK: ret <2 x i64> [[ADD_I_I]] 8708 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 8709 return vabal_high_u32(a, b, c); 8710 } 8711 8712 // CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 { 8713 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 8714 // CHECK: ret <8 x i16> [[VMULL_I]] 8715 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { 8716 return vmull_s8(a, b); 8717 } 8718 // CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { 8719 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8720 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8721 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8722 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8723 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 8724 // CHECK: ret <4 x i32> [[VMULL2_I]] 8725 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { 8726 return vmull_s16(a, b); 8727 } 8728 // CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { 8729 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8730 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8731 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8732 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8733 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 8734 // CHECK: ret <2 x i64> [[VMULL2_I]] 8735 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { 8736 return vmull_s32(a, b); 8737 } 8738 // CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 { 8739 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 8740 // CHECK: ret <8 x i16> [[VMULL_I]] 8741 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { 8742 return vmull_u8(a, b); 8743 } 8744 // CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 { 8745 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8746 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8747 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8748 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8749 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 8750 // CHECK: ret <4 x i32> [[VMULL2_I]] 8751 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { 8752 return vmull_u16(a, b); 8753 } 8754 // CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 { 8755 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8756 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8757 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8758 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8759 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 8760 // CHECK: ret <2 x i64> [[VMULL2_I]] 8761 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { 8762 return vmull_u32(a, b); 8763 } 8764 8765 // CHECK-LABEL: define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) #0 { 8766 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8767 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8768 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8769 // CHECK: ret <8 x i16> [[VMULL_I_I]] 8770 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) { 8771 return vmull_high_s8(a, b); 8772 } 8773 // CHECK-LABEL: define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8774 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8775 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8776 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8777 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8778 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8779 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8780 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8781 // CHECK: ret <4 x i32> [[VMULL2_I_I]] 8782 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) { 8783 return vmull_high_s16(a, b); 8784 } 8785 // CHECK-LABEL: define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8786 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8787 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8788 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8789 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8790 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8791 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8792 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8793 // CHECK: ret <2 x i64> [[VMULL2_I_I]] 8794 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) { 8795 return vmull_high_s32(a, b); 8796 } 8797 // CHECK-LABEL: define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) #0 { 8798 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8799 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8800 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8801 // CHECK: ret <8 x i16> [[VMULL_I_I]] 8802 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) { 8803 return vmull_high_u8(a, b); 8804 } 8805 // CHECK-LABEL: define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8806 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8807 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8808 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8809 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8810 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8811 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8812 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8813 // CHECK: ret <4 x i32> [[VMULL2_I_I]] 8814 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) { 8815 return vmull_high_u16(a, b); 8816 } 8817 // CHECK-LABEL: define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8818 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8819 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8820 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8821 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8822 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8823 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8824 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8825 // CHECK: ret <2 x i64> [[VMULL2_I_I]] 8826 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) { 8827 return vmull_high_u32(a, b); 8828 } 8829 8830 // CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8831 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8832 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8833 // CHECK: ret <8 x i16> [[ADD_I]] 8834 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8835 return vmlal_s8(a, b, c); 8836 } 8837 // CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8838 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8839 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8840 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8841 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8842 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8843 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8844 // CHECK: ret <4 x i32> [[ADD_I]] 8845 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8846 return vmlal_s16(a, b, c); 8847 } 8848 // CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8849 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8850 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8851 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8852 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8853 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8854 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8855 // CHECK: ret <2 x i64> [[ADD_I]] 8856 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8857 return vmlal_s32(a, b, c); 8858 } 8859 // CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8860 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8861 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8862 // CHECK: ret <8 x i16> [[ADD_I]] 8863 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8864 return vmlal_u8(a, b, c); 8865 } 8866 // CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8867 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8868 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8869 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8870 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8871 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8872 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8873 // CHECK: ret <4 x i32> [[ADD_I]] 8874 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 8875 return vmlal_u16(a, b, c); 8876 } 8877 // CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8879 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8880 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8881 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8882 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8883 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8884 // CHECK: ret <2 x i64> [[ADD_I]] 8885 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 8886 return vmlal_u32(a, b, c); 8887 } 8888 8889 // CHECK-LABEL: define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8890 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8891 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8892 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8893 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]] 8894 // CHECK: ret <8 x i16> [[ADD_I_I]] 8895 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 8896 return vmlal_high_s8(a, b, c); 8897 } 8898 // CHECK-LABEL: define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8899 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8900 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8902 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8903 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8904 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8905 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 8906 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] 8907 // CHECK: ret <4 x i32> [[ADD_I_I]] 8908 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 8909 return vmlal_high_s16(a, b, c); 8910 } 8911 // CHECK-LABEL: define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8912 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8913 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8914 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8915 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8916 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8917 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8918 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 8919 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] 8920 // CHECK: ret <2 x i64> [[ADD_I_I]] 8921 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 8922 return vmlal_high_s32(a, b, c); 8923 } 8924 // CHECK-LABEL: define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8925 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8926 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8927 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 8928 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]] 8929 // CHECK: ret <8 x i16> [[ADD_I_I]] 8930 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 8931 return vmlal_high_u8(a, b, c); 8932 } 8933 // CHECK-LABEL: define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8934 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8935 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8936 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8937 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8938 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8939 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8940 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 8941 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] 8942 // CHECK: ret <4 x i32> [[ADD_I_I]] 8943 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 8944 return vmlal_high_u16(a, b, c); 8945 } 8946 // CHECK-LABEL: define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8947 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8948 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8949 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8950 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8951 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8952 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8953 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 8954 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] 8955 // CHECK: ret <2 x i64> [[ADD_I_I]] 8956 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 8957 return vmlal_high_u32(a, b, c); 8958 } 8959 8960 // CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8961 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8962 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 8963 // CHECK: ret <8 x i16> [[SUB_I]] 8964 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8965 return vmlsl_s8(a, b, c); 8966 } 8967 // CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8968 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8969 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8970 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8971 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8972 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8973 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 8974 // CHECK: ret <4 x i32> [[SUB_I]] 8975 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8976 return vmlsl_s16(a, b, c); 8977 } 8978 // CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8979 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8980 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8981 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8982 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8983 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8984 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 8985 // CHECK: ret <2 x i64> [[SUB_I]] 8986 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8987 return vmlsl_s32(a, b, c); 8988 } 8989 // CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8990 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8991 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 8992 // CHECK: ret <8 x i16> [[SUB_I]] 8993 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8994 return vmlsl_u8(a, b, c); 8995 } 8996 // CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8997 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8998 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8999 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9000 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9001 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9002 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9003 // CHECK: ret <4 x i32> [[SUB_I]] 9004 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 9005 return vmlsl_u16(a, b, c); 9006 } 9007 // CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9008 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9009 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9010 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9011 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9012 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9013 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9014 // CHECK: ret <2 x i64> [[SUB_I]] 9015 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 9016 return vmlsl_u32(a, b, c); 9017 } 9018 9019 // CHECK-LABEL: define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 9020 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9021 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9022 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 9023 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]] 9024 // CHECK: ret <8 x i16> [[SUB_I_I]] 9025 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 9026 return vmlsl_high_s8(a, b, c); 9027 } 9028 // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9029 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9030 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9031 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9032 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9033 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9034 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9035 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 9036 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] 9037 // CHECK: ret <4 x i32> [[SUB_I_I]] 9038 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 9039 return vmlsl_high_s16(a, b, c); 9040 } 9041 // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9042 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9043 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 9044 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9045 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9046 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9047 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9048 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 9049 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] 9050 // CHECK: ret <2 x i64> [[SUB_I_I]] 9051 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 9052 return vmlsl_high_s32(a, b, c); 9053 } 9054 // CHECK-LABEL: define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) #0 { 9055 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9056 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9057 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 9058 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]] 9059 // CHECK: ret <8 x i16> [[SUB_I_I]] 9060 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 9061 return vmlsl_high_u8(a, b, c); 9062 } 9063 // CHECK-LABEL: define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9064 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9065 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9066 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9067 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9068 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9069 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9070 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[VMULL_I_I_I]], <4 x i16> [[VMULL1_I_I_I]]) #4 9071 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] 9072 // CHECK: ret <4 x i32> [[SUB_I_I]] 9073 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 9074 return vmlsl_high_u16(a, b, c); 9075 } 9076 // CHECK-LABEL: define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9077 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9078 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 9079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9080 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9081 // CHECK: [[VMULL_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9082 // CHECK: [[VMULL1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9083 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[VMULL_I_I_I]], <2 x i32> [[VMULL1_I_I_I]]) #4 9084 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] 9085 // CHECK: ret <2 x i64> [[SUB_I_I]] 9086 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 9087 return vmlsl_high_u32(a, b, c); 9088 } 9089 9090 // CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { 9091 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 9092 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9093 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9094 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9095 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 9096 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 9097 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 9098 // CHECK: ret <4 x i32> [[TMP2]] 9099 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { 9100 return vqdmull_s16(a, b); 9101 } 9102 // CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { 9103 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 9104 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9105 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9106 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9107 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 9108 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 9109 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 9110 // CHECK: ret <2 x i64> [[TMP2]] 9111 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { 9112 return vqdmull_s32(a, b); 9113 } 9114 9115 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9116 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9117 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9118 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 9119 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9120 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 9121 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 9122 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9123 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 9124 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 9125 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 9126 return vqdmlal_s16(a, b, c); 9127 } 9128 9129 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9130 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9131 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9132 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9133 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9134 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 9135 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 9136 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9137 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 9138 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 9139 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9140 return vqdmlal_s32(a, b, c); 9141 } 9142 9143 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9146 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 9147 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9148 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 9149 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 9150 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9151 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 9152 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 9153 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 9154 return vqdmlsl_s16(a, b, c); 9155 } 9156 9157 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9158 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9159 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9160 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9161 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9162 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 9163 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 9164 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9165 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 9166 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 9167 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9168 return vqdmlsl_s32(a, b, c); 9169 } 9170 9171 // CHECK-LABEL: define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) #0 { 9172 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9173 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9174 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9175 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9176 // CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9177 // CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9178 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMULL_V_I_I]], <4 x i16> [[VQDMULL_V1_I_I]]) #4 9179 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8> 9180 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <4 x i32> 9181 // CHECK: ret <4 x i32> [[TMP2]] 9182 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) { 9183 return vqdmull_high_s16(a, b); 9184 } 9185 // CHECK-LABEL: define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) #0 { 9186 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 9187 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9188 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9189 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9190 // CHECK: [[VQDMULL_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9191 // CHECK: [[VQDMULL_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9192 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMULL_V_I_I]], <2 x i32> [[VQDMULL_V1_I_I]]) #4 9193 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8> 9194 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I_I]] to <2 x i64> 9195 // CHECK: ret <2 x i64> [[TMP2]] 9196 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) { 9197 return vqdmull_high_s32(a, b); 9198 } 9199 9200 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9201 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9202 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9203 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9204 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9205 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9206 // CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9207 // CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 9208 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4 9209 // CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9210 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[VQDMLAL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4 9211 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]] 9212 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 9213 return vqdmlal_high_s16(a, b, c); 9214 } 9215 9216 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9217 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9218 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 9219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9220 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9221 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9222 // CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9223 // CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 9224 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4 9225 // CHECK: [[VQDMLAL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9226 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[VQDMLAL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4 9227 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]] 9228 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 9229 return vqdmlal_high_s32(a, b, c); 9230 } 9231 9232 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9233 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9234 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 9235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9236 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 9237 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 9238 // CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9239 // CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 9240 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[VQDMLAL_I_I]], <4 x i16> [[VQDMLAL1_I_I]]) #4 9241 // CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9242 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[VQDMLSL_V_I_I]], <4 x i32> [[VQDMLAL2_I_I]]) #4 9243 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]] 9244 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 9245 return vqdmlsl_high_s16(a, b, c); 9246 } 9247 9248 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9249 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 9250 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 9251 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9252 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 9253 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 9254 // CHECK: [[VQDMLAL_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9255 // CHECK: [[VQDMLAL1_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 9256 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[VQDMLAL_I_I]], <2 x i32> [[VQDMLAL1_I_I]]) #4 9257 // CHECK: [[VQDMLSL_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9258 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[VQDMLSL_V_I_I]], <2 x i64> [[VQDMLAL2_I_I]]) #4 9259 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]] 9260 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 9261 return vqdmlsl_high_s32(a, b, c); 9262 } 9263 9264 // CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 { 9265 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) #4 9266 // CHECK: ret <8 x i16> [[VMULL_I]] 9267 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { 9268 return vmull_p8(a, b); 9269 } 9270 9271 // CHECK-LABEL: define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) #0 { 9272 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9273 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9274 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) #4 9275 // CHECK: ret <8 x i16> [[VMULL_I_I]] 9276 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) { 9277 return vmull_high_p8(a, b); 9278 } 9279 9280 // CHECK-LABEL: define i64 @test_vaddd_s64(i64 %a, i64 %b) #0 { 9281 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b 9282 // CHECK: ret i64 [[VADDD_I]] 9283 int64_t test_vaddd_s64(int64_t a, int64_t b) { 9284 return vaddd_s64(a, b); 9285 } 9286 9287 // CHECK-LABEL: define i64 @test_vaddd_u64(i64 %a, i64 %b) #0 { 9288 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b 9289 // CHECK: ret i64 [[VADDD_I]] 9290 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) { 9291 return vaddd_u64(a, b); 9292 } 9293 9294 // CHECK-LABEL: define i64 @test_vsubd_s64(i64 %a, i64 %b) #0 { 9295 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b 9296 // CHECK: ret i64 [[VSUBD_I]] 9297 int64_t test_vsubd_s64(int64_t a, int64_t b) { 9298 return vsubd_s64(a, b); 9299 } 9300 9301 // CHECK-LABEL: define i64 @test_vsubd_u64(i64 %a, i64 %b) #0 { 9302 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b 9303 // CHECK: ret i64 [[VSUBD_I]] 9304 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { 9305 return vsubd_u64(a, b); 9306 } 9307 9308 // CHECK-LABEL: define i8 @test_vqaddb_s8(i8 %a, i8 %b) #0 { 9309 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9310 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9311 // CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9312 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0 9313 // CHECK: ret i8 [[TMP2]] 9314 int8_t test_vqaddb_s8(int8_t a, int8_t b) { 9315 return vqaddb_s8(a, b); 9316 } 9317 9318 // CHECK-LABEL: define i16 @test_vqaddh_s16(i16 %a, i16 %b) #0 { 9319 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9320 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9321 // CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9322 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0 9323 // CHECK: ret i16 [[TMP2]] 9324 int16_t test_vqaddh_s16(int16_t a, int16_t b) { 9325 return vqaddh_s16(a, b); 9326 } 9327 9328 // CHECK-LABEL: define i32 @test_vqadds_s32(i32 %a, i32 %b) #0 { 9329 // CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) #4 9330 // CHECK: ret i32 [[VQADDS_S32_I]] 9331 int32_t test_vqadds_s32(int32_t a, int32_t b) { 9332 return vqadds_s32(a, b); 9333 } 9334 9335 // CHECK-LABEL: define i64 @test_vqaddd_s64(i64 %a, i64 %b) #0 { 9336 // CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) #4 9337 // CHECK: ret i64 [[VQADDD_S64_I]] 9338 int64_t test_vqaddd_s64(int64_t a, int64_t b) { 9339 return vqaddd_s64(a, b); 9340 } 9341 9342 // CHECK-LABEL: define i8 @test_vqaddb_u8(i8 %a, i8 %b) #0 { 9343 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9344 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9345 // CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9346 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0 9347 // CHECK: ret i8 [[TMP2]] 9348 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) { 9349 return vqaddb_u8(a, b); 9350 } 9351 9352 // CHECK-LABEL: define i16 @test_vqaddh_u16(i16 %a, i16 %b) #0 { 9353 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9354 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9355 // CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9356 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0 9357 // CHECK: ret i16 [[TMP2]] 9358 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) { 9359 return vqaddh_u16(a, b); 9360 } 9361 9362 // CHECK-LABEL: define i32 @test_vqadds_u32(i32 %a, i32 %b) #0 { 9363 // CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) #4 9364 // CHECK: ret i32 [[VQADDS_U32_I]] 9365 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) { 9366 return vqadds_u32(a, b); 9367 } 9368 9369 // CHECK-LABEL: define i64 @test_vqaddd_u64(i64 %a, i64 %b) #0 { 9370 // CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) #4 9371 // CHECK: ret i64 [[VQADDD_U64_I]] 9372 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) { 9373 return vqaddd_u64(a, b); 9374 } 9375 9376 // CHECK-LABEL: define i8 @test_vqsubb_s8(i8 %a, i8 %b) #0 { 9377 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9378 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9379 // CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9380 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0 9381 // CHECK: ret i8 [[TMP2]] 9382 int8_t test_vqsubb_s8(int8_t a, int8_t b) { 9383 return vqsubb_s8(a, b); 9384 } 9385 9386 // CHECK-LABEL: define i16 @test_vqsubh_s16(i16 %a, i16 %b) #0 { 9387 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9388 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9389 // CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9390 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0 9391 // CHECK: ret i16 [[TMP2]] 9392 int16_t test_vqsubh_s16(int16_t a, int16_t b) { 9393 return vqsubh_s16(a, b); 9394 } 9395 9396 // CHECK-LABEL: define i32 @test_vqsubs_s32(i32 %a, i32 %b) #0 { 9397 // CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) #4 9398 // CHECK: ret i32 [[VQSUBS_S32_I]] 9399 int32_t test_vqsubs_s32(int32_t a, int32_t b) { 9400 return vqsubs_s32(a, b); 9401 } 9402 9403 // CHECK-LABEL: define i64 @test_vqsubd_s64(i64 %a, i64 %b) #0 { 9404 // CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) #4 9405 // CHECK: ret i64 [[VQSUBD_S64_I]] 9406 int64_t test_vqsubd_s64(int64_t a, int64_t b) { 9407 return vqsubd_s64(a, b); 9408 } 9409 9410 // CHECK-LABEL: define i8 @test_vqsubb_u8(i8 %a, i8 %b) #0 { 9411 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9412 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9413 // CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9414 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0 9415 // CHECK: ret i8 [[TMP2]] 9416 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) { 9417 return vqsubb_u8(a, b); 9418 } 9419 9420 // CHECK-LABEL: define i16 @test_vqsubh_u16(i16 %a, i16 %b) #0 { 9421 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9422 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9423 // CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9424 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0 9425 // CHECK: ret i16 [[TMP2]] 9426 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) { 9427 return vqsubh_u16(a, b); 9428 } 9429 9430 // CHECK-LABEL: define i32 @test_vqsubs_u32(i32 %a, i32 %b) #0 { 9431 // CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) #4 9432 // CHECK: ret i32 [[VQSUBS_U32_I]] 9433 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) { 9434 return vqsubs_u32(a, b); 9435 } 9436 9437 // CHECK-LABEL: define i64 @test_vqsubd_u64(i64 %a, i64 %b) #0 { 9438 // CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) #4 9439 // CHECK: ret i64 [[VQSUBD_U64_I]] 9440 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) { 9441 return vqsubd_u64(a, b); 9442 } 9443 9444 // CHECK-LABEL: define i64 @test_vshld_s64(i64 %a, i64 %b) #0 { 9445 // CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) #4 9446 // CHECK: ret i64 [[VSHLD_S64_I]] 9447 int64_t test_vshld_s64(int64_t a, int64_t b) { 9448 return vshld_s64(a, b); 9449 } 9450 9451 // CHECK-LABEL: define i64 @test_vshld_u64(i64 %a, i64 %b) #0 { 9452 // CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) #4 9453 // CHECK: ret i64 [[VSHLD_U64_I]] 9454 uint64_t test_vshld_u64(uint64_t a, uint64_t b) { 9455 return vshld_u64(a, b); 9456 } 9457 9458 // CHECK-LABEL: define i8 @test_vqshlb_s8(i8 %a, i8 %b) #0 { 9459 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9460 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9461 // CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9462 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0 9463 // CHECK: ret i8 [[TMP2]] 9464 int8_t test_vqshlb_s8(int8_t a, int8_t b) { 9465 return vqshlb_s8(a, b); 9466 } 9467 9468 // CHECK-LABEL: define i16 @test_vqshlh_s16(i16 %a, i16 %b) #0 { 9469 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9470 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9471 // CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9472 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0 9473 // CHECK: ret i16 [[TMP2]] 9474 int16_t test_vqshlh_s16(int16_t a, int16_t b) { 9475 return vqshlh_s16(a, b); 9476 } 9477 9478 // CHECK-LABEL: define i32 @test_vqshls_s32(i32 %a, i32 %b) #0 { 9479 // CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) #4 9480 // CHECK: ret i32 [[VQSHLS_S32_I]] 9481 int32_t test_vqshls_s32(int32_t a, int32_t b) { 9482 return vqshls_s32(a, b); 9483 } 9484 9485 // CHECK-LABEL: define i64 @test_vqshld_s64(i64 %a, i64 %b) #0 { 9486 // CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) #4 9487 // CHECK: ret i64 [[VQSHLD_S64_I]] 9488 int64_t test_vqshld_s64(int64_t a, int64_t b) { 9489 return vqshld_s64(a, b); 9490 } 9491 9492 // CHECK-LABEL: define i8 @test_vqshlb_u8(i8 %a, i8 %b) #0 { 9493 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9494 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9495 // CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9496 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0 9497 // CHECK: ret i8 [[TMP2]] 9498 uint8_t test_vqshlb_u8(uint8_t a, uint8_t b) { 9499 return vqshlb_u8(a, b); 9500 } 9501 9502 // CHECK-LABEL: define i16 @test_vqshlh_u16(i16 %a, i16 %b) #0 { 9503 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9504 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9505 // CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9506 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0 9507 // CHECK: ret i16 [[TMP2]] 9508 uint16_t test_vqshlh_u16(uint16_t a, uint16_t b) { 9509 return vqshlh_u16(a, b); 9510 } 9511 9512 // CHECK-LABEL: define i32 @test_vqshls_u32(i32 %a, i32 %b) #0 { 9513 // CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) #4 9514 // CHECK: ret i32 [[VQSHLS_U32_I]] 9515 uint32_t test_vqshls_u32(uint32_t a, uint32_t b) { 9516 return vqshls_u32(a, b); 9517 } 9518 9519 // CHECK-LABEL: define i64 @test_vqshld_u64(i64 %a, i64 %b) #0 { 9520 // CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) #4 9521 // CHECK: ret i64 [[VQSHLD_U64_I]] 9522 uint64_t test_vqshld_u64(uint64_t a, uint64_t b) { 9523 return vqshld_u64(a, b); 9524 } 9525 9526 // CHECK-LABEL: define i64 @test_vrshld_s64(i64 %a, i64 %b) #0 { 9527 // CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) #4 9528 // CHECK: ret i64 [[VRSHLD_S64_I]] 9529 int64_t test_vrshld_s64(int64_t a, int64_t b) { 9530 return vrshld_s64(a, b); 9531 } 9532 9533 9534 // CHECK-LABEL: define i64 @test_vrshld_u64(i64 %a, i64 %b) #0 { 9535 // CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) #4 9536 // CHECK: ret i64 [[VRSHLD_U64_I]] 9537 uint64_t test_vrshld_u64(uint64_t a, uint64_t b) { 9538 return vrshld_u64(a, b); 9539 } 9540 9541 // CHECK-LABEL: define i8 @test_vqrshlb_s8(i8 %a, i8 %b) #0 { 9542 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9543 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9544 // CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9545 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0 9546 // CHECK: ret i8 [[TMP2]] 9547 int8_t test_vqrshlb_s8(int8_t a, int8_t b) { 9548 return vqrshlb_s8(a, b); 9549 } 9550 9551 // CHECK-LABEL: define i16 @test_vqrshlh_s16(i16 %a, i16 %b) #0 { 9552 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9553 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9554 // CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9555 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0 9556 // CHECK: ret i16 [[TMP2]] 9557 int16_t test_vqrshlh_s16(int16_t a, int16_t b) { 9558 return vqrshlh_s16(a, b); 9559 } 9560 9561 // CHECK-LABEL: define i32 @test_vqrshls_s32(i32 %a, i32 %b) #0 { 9562 // CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) #4 9563 // CHECK: ret i32 [[VQRSHLS_S32_I]] 9564 int32_t test_vqrshls_s32(int32_t a, int32_t b) { 9565 return vqrshls_s32(a, b); 9566 } 9567 9568 // CHECK-LABEL: define i64 @test_vqrshld_s64(i64 %a, i64 %b) #0 { 9569 // CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) #4 9570 // CHECK: ret i64 [[VQRSHLD_S64_I]] 9571 int64_t test_vqrshld_s64(int64_t a, int64_t b) { 9572 return vqrshld_s64(a, b); 9573 } 9574 9575 // CHECK-LABEL: define i8 @test_vqrshlb_u8(i8 %a, i8 %b) #0 { 9576 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 9577 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 9578 // CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 9579 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0 9580 // CHECK: ret i8 [[TMP2]] 9581 uint8_t test_vqrshlb_u8(uint8_t a, uint8_t b) { 9582 return vqrshlb_u8(a, b); 9583 } 9584 9585 // CHECK-LABEL: define i16 @test_vqrshlh_u16(i16 %a, i16 %b) #0 { 9586 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9587 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9588 // CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9589 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0 9590 // CHECK: ret i16 [[TMP2]] 9591 uint16_t test_vqrshlh_u16(uint16_t a, uint16_t b) { 9592 return vqrshlh_u16(a, b); 9593 } 9594 9595 // CHECK-LABEL: define i32 @test_vqrshls_u32(i32 %a, i32 %b) #0 { 9596 // CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) #4 9597 // CHECK: ret i32 [[VQRSHLS_U32_I]] 9598 uint32_t test_vqrshls_u32(uint32_t a, uint32_t b) { 9599 return vqrshls_u32(a, b); 9600 } 9601 9602 // CHECK-LABEL: define i64 @test_vqrshld_u64(i64 %a, i64 %b) #0 { 9603 // CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) #4 9604 // CHECK: ret i64 [[VQRSHLD_U64_I]] 9605 uint64_t test_vqrshld_u64(uint64_t a, uint64_t b) { 9606 return vqrshld_u64(a, b); 9607 } 9608 9609 // CHECK-LABEL: define i64 @test_vpaddd_s64(<2 x i64> %a) #0 { 9610 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9611 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9612 // CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 9613 // CHECK: ret i64 [[VPADDD_S64_I]] 9614 int64_t test_vpaddd_s64(int64x2_t a) { 9615 return vpaddd_s64(a); 9616 } 9617 9618 // CHECK-LABEL: define float @test_vpadds_f32(<2 x float> %a) #0 { 9619 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9621 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 0 9622 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> [[TMP1]], i64 1 9623 // CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]] 9624 // CHECK: ret float [[VPADDD_I]] 9625 float32_t test_vpadds_f32(float32x2_t a) { 9626 return vpadds_f32(a); 9627 } 9628 9629 // CHECK-LABEL: define double @test_vpaddd_f64(<2 x double> %a) #0 { 9630 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9631 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9632 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 9633 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> [[TMP1]], i64 1 9634 // CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]] 9635 // CHECK: ret double [[VPADDD_I]] 9636 float64_t test_vpaddd_f64(float64x2_t a) { 9637 return vpaddd_f64(a); 9638 } 9639 9640 // CHECK-LABEL: define float @test_vpmaxnms_f32(<2 x float> %a) #0 { 9641 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9642 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9643 // CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 9644 // CHECK: ret float [[VPMAXNMS_F32_I]] 9645 float32_t test_vpmaxnms_f32(float32x2_t a) { 9646 return vpmaxnms_f32(a); 9647 } 9648 9649 // CHECK-LABEL: define double @test_vpmaxnmqd_f64(<2 x double> %a) #0 { 9650 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9651 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9652 // CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 9653 // CHECK: ret double [[VPMAXNMQD_F64_I]] 9654 float64_t test_vpmaxnmqd_f64(float64x2_t a) { 9655 return vpmaxnmqd_f64(a); 9656 } 9657 9658 // CHECK-LABEL: define float @test_vpmaxs_f32(<2 x float> %a) #0 { 9659 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9660 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9661 // CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4 9662 // CHECK: ret float [[VPMAXS_F32_I]] 9663 float32_t test_vpmaxs_f32(float32x2_t a) { 9664 return vpmaxs_f32(a); 9665 } 9666 9667 // CHECK-LABEL: define double @test_vpmaxqd_f64(<2 x double> %a) #0 { 9668 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9669 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9670 // CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4 9671 // CHECK: ret double [[VPMAXQD_F64_I]] 9672 float64_t test_vpmaxqd_f64(float64x2_t a) { 9673 return vpmaxqd_f64(a); 9674 } 9675 9676 // CHECK-LABEL: define float @test_vpminnms_f32(<2 x float> %a) #0 { 9677 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9678 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9679 // CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 9680 // CHECK: ret float [[VPMINNMS_F32_I]] 9681 float32_t test_vpminnms_f32(float32x2_t a) { 9682 return vpminnms_f32(a); 9683 } 9684 9685 // CHECK-LABEL: define double @test_vpminnmqd_f64(<2 x double> %a) #0 { 9686 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9687 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9688 // CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 9689 // CHECK: ret double [[VPMINNMQD_F64_I]] 9690 float64_t test_vpminnmqd_f64(float64x2_t a) { 9691 return vpminnmqd_f64(a); 9692 } 9693 9694 // CHECK-LABEL: define float @test_vpmins_f32(<2 x float> %a) #0 { 9695 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 9696 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 9697 // CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4 9698 // CHECK: ret float [[VPMINS_F32_I]] 9699 float32_t test_vpmins_f32(float32x2_t a) { 9700 return vpmins_f32(a); 9701 } 9702 9703 // CHECK-LABEL: define double @test_vpminqd_f64(<2 x double> %a) #0 { 9704 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 9705 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 9706 // CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4 9707 // CHECK: ret double [[VPMINQD_F64_I]] 9708 float64_t test_vpminqd_f64(float64x2_t a) { 9709 return vpminqd_f64(a); 9710 } 9711 9712 // CHECK-LABEL: define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) #0 { 9713 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9714 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9715 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9716 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0 9717 // CHECK: ret i16 [[TMP2]] 9718 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) { 9719 return vqdmulhh_s16(a, b); 9720 } 9721 9722 // CHECK-LABEL: define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) #0 { 9723 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) #4 9724 // CHECK: ret i32 [[VQDMULHS_S32_I]] 9725 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) { 9726 return vqdmulhs_s32(a, b); 9727 } 9728 9729 // CHECK-LABEL: define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) #0 { 9730 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 9731 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 9732 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 9733 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 9734 // CHECK: ret i16 [[TMP2]] 9735 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) { 9736 return vqrdmulhh_s16(a, b); 9737 } 9738 9739 // CHECK-LABEL: define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) #0 { 9740 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) #4 9741 // CHECK: ret i32 [[VQRDMULHS_S32_I]] 9742 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) { 9743 return vqrdmulhs_s32(a, b); 9744 } 9745 9746 // CHECK-LABEL: define float @test_vmulxs_f32(float %a, float %b) #0 { 9747 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) #4 9748 // CHECK: ret float [[VMULXS_F32_I]] 9749 float32_t test_vmulxs_f32(float32_t a, float32_t b) { 9750 return vmulxs_f32(a, b); 9751 } 9752 9753 // CHECK-LABEL: define double @test_vmulxd_f64(double %a, double %b) #0 { 9754 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) #4 9755 // CHECK: ret double [[VMULXD_F64_I]] 9756 float64_t test_vmulxd_f64(float64_t a, float64_t b) { 9757 return vmulxd_f64(a, b); 9758 } 9759 9760 // CHECK-LABEL: define <1 x double> @test_vmulx_f64(<1 x double> %a, <1 x double> %b) #0 { 9761 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 9762 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 9763 // CHECK: [[VMULX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 9764 // CHECK: [[VMULX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 9765 // CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> [[VMULX_I]], <1 x double> [[VMULX1_I]]) #4 9766 // CHECK: ret <1 x double> [[VMULX2_I]] 9767 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) { 9768 return vmulx_f64(a, b); 9769 } 9770 9771 // CHECK-LABEL: define float @test_vrecpss_f32(float %a, float %b) #0 { 9772 // CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) #4 9773 // CHECK: ret float [[VRECPS_I]] 9774 float32_t test_vrecpss_f32(float32_t a, float32_t b) { 9775 return vrecpss_f32(a, b); 9776 } 9777 9778 // CHECK-LABEL: define double @test_vrecpsd_f64(double %a, double %b) #0 { 9779 // CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) #4 9780 // CHECK: ret double [[VRECPS_I]] 9781 float64_t test_vrecpsd_f64(float64_t a, float64_t b) { 9782 return vrecpsd_f64(a, b); 9783 } 9784 9785 // CHECK-LABEL: define float @test_vrsqrtss_f32(float %a, float %b) #0 { 9786 // CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) #4 9787 // CHECK: ret float [[VRSQRTSS_F32_I]] 9788 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) { 9789 return vrsqrtss_f32(a, b); 9790 } 9791 9792 // CHECK-LABEL: define double @test_vrsqrtsd_f64(double %a, double %b) #0 { 9793 // CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) #4 9794 // CHECK: ret double [[VRSQRTSD_F64_I]] 9795 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) { 9796 return vrsqrtsd_f64(a, b); 9797 } 9798 9799 // CHECK-LABEL: define float @test_vcvts_f32_s32(i32 %a) #0 { 9800 // CHECK: [[TMP0:%.*]] = sitofp i32 %a to float 9801 // CHECK: ret float [[TMP0]] 9802 float32_t test_vcvts_f32_s32(int32_t a) { 9803 return vcvts_f32_s32(a); 9804 } 9805 9806 // CHECK-LABEL: define double @test_vcvtd_f64_s64(i64 %a) #0 { 9807 // CHECK: [[TMP0:%.*]] = sitofp i64 %a to double 9808 // CHECK: ret double [[TMP0]] 9809 float64_t test_vcvtd_f64_s64(int64_t a) { 9810 return vcvtd_f64_s64(a); 9811 } 9812 9813 // CHECK-LABEL: define float @test_vcvts_f32_u32(i32 %a) #0 { 9814 // CHECK: [[TMP0:%.*]] = uitofp i32 %a to float 9815 // CHECK: ret float [[TMP0]] 9816 float32_t test_vcvts_f32_u32(uint32_t a) { 9817 return vcvts_f32_u32(a); 9818 } 9819 9820 // CHECK-LABEL: define double @test_vcvtd_f64_u64(i64 %a) #0 { 9821 // CHECK: [[TMP0:%.*]] = uitofp i64 %a to double 9822 // CHECK: ret double [[TMP0]] 9823 float64_t test_vcvtd_f64_u64(uint64_t a) { 9824 return vcvtd_f64_u64(a); 9825 } 9826 9827 // CHECK-LABEL: define float @test_vrecpes_f32(float %a) #0 { 9828 // CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) #4 9829 // CHECK: ret float [[VRECPES_F32_I]] 9830 float32_t test_vrecpes_f32(float32_t a) { 9831 return vrecpes_f32(a); 9832 } 9833 9834 // CHECK-LABEL: define double @test_vrecped_f64(double %a) #0 { 9835 // CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) #4 9836 // CHECK: ret double [[VRECPED_F64_I]] 9837 float64_t test_vrecped_f64(float64_t a) { 9838 return vrecped_f64(a); 9839 } 9840 9841 // CHECK-LABEL: define float @test_vrecpxs_f32(float %a) #0 { 9842 // CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) #4 9843 // CHECK: ret float [[VRECPXS_F32_I]] 9844 float32_t test_vrecpxs_f32(float32_t a) { 9845 return vrecpxs_f32(a); 9846 } 9847 9848 // CHECK-LABEL: define double @test_vrecpxd_f64(double %a) #0 { 9849 // CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) #4 9850 // CHECK: ret double [[VRECPXD_F64_I]] 9851 float64_t test_vrecpxd_f64(float64_t a) { 9852 return vrecpxd_f64(a); 9853 } 9854 9855 // CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 { 9856 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 9857 // CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9858 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4 9859 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] 9860 uint32x2_t test_vrsqrte_u32(uint32x2_t a) { 9861 return vrsqrte_u32(a); 9862 } 9863 9864 // CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 { 9865 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9866 // CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9867 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4 9868 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] 9869 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { 9870 return vrsqrteq_u32(a); 9871 } 9872 9873 // CHECK-LABEL: define float @test_vrsqrtes_f32(float %a) #0 { 9874 // CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) #4 9875 // CHECK: ret float [[VRSQRTES_F32_I]] 9876 float32_t test_vrsqrtes_f32(float32_t a) { 9877 return vrsqrtes_f32(a); 9878 } 9879 9880 // CHECK-LABEL: define double @test_vrsqrted_f64(double %a) #0 { 9881 // CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) #4 9882 // CHECK: ret double [[VRSQRTED_F64_I]] 9883 float64_t test_vrsqrted_f64(float64_t a) { 9884 return vrsqrted_f64(a); 9885 } 9886 9887 // CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 { 9888 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 9889 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] 9890 // CHECK: ret <16 x i8> [[TMP1]] 9891 uint8x16_t test_vld1q_u8(uint8_t const *a) { 9892 return vld1q_u8(a); 9893 } 9894 9895 // CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 { 9896 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 9897 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 9898 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] 9899 // CHECK: ret <8 x i16> [[TMP2]] 9900 uint16x8_t test_vld1q_u16(uint16_t const *a) { 9901 return vld1q_u16(a); 9902 } 9903 9904 // CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 { 9905 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 9906 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 9907 // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] 9908 // CHECK: ret <4 x i32> [[TMP2]] 9909 uint32x4_t test_vld1q_u32(uint32_t const *a) { 9910 return vld1q_u32(a); 9911 } 9912 9913 // CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 { 9914 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 9915 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 9916 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] 9917 // CHECK: ret <2 x i64> [[TMP2]] 9918 uint64x2_t test_vld1q_u64(uint64_t const *a) { 9919 return vld1q_u64(a); 9920 } 9921 9922 // CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 { 9923 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 9924 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] 9925 // CHECK: ret <16 x i8> [[TMP1]] 9926 int8x16_t test_vld1q_s8(int8_t const *a) { 9927 return vld1q_s8(a); 9928 } 9929 9930 // CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 { 9931 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 9932 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 9933 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] 9934 // CHECK: ret <8 x i16> [[TMP2]] 9935 int16x8_t test_vld1q_s16(int16_t const *a) { 9936 return vld1q_s16(a); 9937 } 9938 9939 // CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 { 9940 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 9941 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 9942 // CHECK: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]] 9943 // CHECK: ret <4 x i32> [[TMP2]] 9944 int32x4_t test_vld1q_s32(int32_t const *a) { 9945 return vld1q_s32(a); 9946 } 9947 9948 // CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 { 9949 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 9950 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 9951 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] 9952 // CHECK: ret <2 x i64> [[TMP2]] 9953 int64x2_t test_vld1q_s64(int64_t const *a) { 9954 return vld1q_s64(a); 9955 } 9956 9957 // CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 { 9958 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 9959 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 9960 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] 9961 // CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <8 x half> 9962 // CHECK: ret <8 x half> [[TMP3]] 9963 float16x8_t test_vld1q_f16(float16_t const *a) { 9964 return vld1q_f16(a); 9965 } 9966 9967 // CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 { 9968 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 9969 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 9970 // CHECK: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]] 9971 // CHECK: ret <4 x float> [[TMP2]] 9972 float32x4_t test_vld1q_f32(float32_t const *a) { 9973 return vld1q_f32(a); 9974 } 9975 9976 // CHECK-LABEL: define <2 x double> @test_vld1q_f64(double* %a) #0 { 9977 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 9978 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x double>* 9979 // CHECK: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]] 9980 // CHECK: ret <2 x double> [[TMP2]] 9981 float64x2_t test_vld1q_f64(float64_t const *a) { 9982 return vld1q_f64(a); 9983 } 9984 9985 // CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 { 9986 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 9987 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]] 9988 // CHECK: ret <16 x i8> [[TMP1]] 9989 poly8x16_t test_vld1q_p8(poly8_t const *a) { 9990 return vld1q_p8(a); 9991 } 9992 9993 // CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 { 9994 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 9995 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 9996 // CHECK: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]] 9997 // CHECK: ret <8 x i16> [[TMP2]] 9998 poly16x8_t test_vld1q_p16(poly16_t const *a) { 9999 return vld1q_p16(a); 10000 } 10001 10002 // CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 { 10003 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 10004 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] 10005 // CHECK: ret <8 x i8> [[TMP1]] 10006 uint8x8_t test_vld1_u8(uint8_t const *a) { 10007 return vld1_u8(a); 10008 } 10009 10010 // CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 { 10011 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 10012 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 10013 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] 10014 // CHECK: ret <4 x i16> [[TMP2]] 10015 uint16x4_t test_vld1_u16(uint16_t const *a) { 10016 return vld1_u16(a); 10017 } 10018 10019 // CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 { 10020 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 10021 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 10022 // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] 10023 // CHECK: ret <2 x i32> [[TMP2]] 10024 uint32x2_t test_vld1_u32(uint32_t const *a) { 10025 return vld1_u32(a); 10026 } 10027 10028 // CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 { 10029 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 10030 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 10031 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] 10032 // CHECK: ret <1 x i64> [[TMP2]] 10033 uint64x1_t test_vld1_u64(uint64_t const *a) { 10034 return vld1_u64(a); 10035 } 10036 10037 // CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 { 10038 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 10039 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] 10040 // CHECK: ret <8 x i8> [[TMP1]] 10041 int8x8_t test_vld1_s8(int8_t const *a) { 10042 return vld1_s8(a); 10043 } 10044 10045 // CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 { 10046 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 10047 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 10048 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] 10049 // CHECK: ret <4 x i16> [[TMP2]] 10050 int16x4_t test_vld1_s16(int16_t const *a) { 10051 return vld1_s16(a); 10052 } 10053 10054 // CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 { 10055 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 10056 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 10057 // CHECK: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]] 10058 // CHECK: ret <2 x i32> [[TMP2]] 10059 int32x2_t test_vld1_s32(int32_t const *a) { 10060 return vld1_s32(a); 10061 } 10062 10063 // CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 { 10064 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 10065 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 10066 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] 10067 // CHECK: ret <1 x i64> [[TMP2]] 10068 int64x1_t test_vld1_s64(int64_t const *a) { 10069 return vld1_s64(a); 10070 } 10071 10072 // CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 { 10073 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 10074 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 10075 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] 10076 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <4 x half> 10077 // CHECK: ret <4 x half> [[TMP3]] 10078 float16x4_t test_vld1_f16(float16_t const *a) { 10079 return vld1_f16(a); 10080 } 10081 10082 // CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 { 10083 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 10084 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 10085 // CHECK: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]] 10086 // CHECK: ret <2 x float> [[TMP2]] 10087 float32x2_t test_vld1_f32(float32_t const *a) { 10088 return vld1_f32(a); 10089 } 10090 10091 // CHECK-LABEL: define <1 x double> @test_vld1_f64(double* %a) #0 { 10092 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 10093 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x double>* 10094 // CHECK: [[TMP2:%.*]] = load <1 x double>, <1 x double>* [[TMP1]] 10095 // CHECK: ret <1 x double> [[TMP2]] 10096 float64x1_t test_vld1_f64(float64_t const *a) { 10097 return vld1_f64(a); 10098 } 10099 10100 // CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 { 10101 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 10102 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]] 10103 // CHECK: ret <8 x i8> [[TMP1]] 10104 poly8x8_t test_vld1_p8(poly8_t const *a) { 10105 return vld1_p8(a); 10106 } 10107 10108 // CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 { 10109 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 10110 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 10111 // CHECK: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]] 10112 // CHECK: ret <4 x i16> [[TMP2]] 10113 poly16x4_t test_vld1_p16(poly16_t const *a) { 10114 return vld1_p16(a); 10115 } 10116 10117 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_u8(i8* %a) #0 { 10118 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 10119 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 10120 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 10121 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10122 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10123 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 10124 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] 10125 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* 10126 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 10127 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) 10128 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 10129 // CHECK: ret %struct.uint8x16x2_t [[TMP5]] 10130 uint8x16x2_t test_vld2q_u8(uint8_t const *a) { 10131 return vld2q_u8(a); 10132 } 10133 10134 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_u16(i16* %a) #0 { 10135 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 10136 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 10137 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 10138 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10139 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10140 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10141 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 10142 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 10143 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* 10144 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 10145 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10146 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 10147 // CHECK: ret %struct.uint16x8x2_t [[TMP6]] 10148 uint16x8x2_t test_vld2q_u16(uint16_t const *a) { 10149 return vld2q_u16(a); 10150 } 10151 10152 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_u32(i32* %a) #0 { 10153 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 10154 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 10155 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 10156 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10157 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 10158 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 10159 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 10160 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] 10161 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* 10162 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 10163 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10164 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 10165 // CHECK: ret %struct.uint32x4x2_t [[TMP6]] 10166 uint32x4x2_t test_vld2q_u32(uint32_t const *a) { 10167 return vld2q_u32(a); 10168 } 10169 10170 // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_u64(i64* %a) #0 { 10171 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 10172 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 10173 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 10174 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10175 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 10176 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 10177 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 10178 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] 10179 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* 10180 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 10181 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10182 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 10183 // CHECK: ret %struct.uint64x2x2_t [[TMP6]] 10184 uint64x2x2_t test_vld2q_u64(uint64_t const *a) { 10185 return vld2q_u64(a); 10186 } 10187 10188 // CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_s8(i8* %a) #0 { 10189 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 10190 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 10191 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 10192 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10193 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10194 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 10195 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] 10196 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* 10197 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 10198 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) 10199 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 10200 // CHECK: ret %struct.int8x16x2_t [[TMP5]] 10201 int8x16x2_t test_vld2q_s8(int8_t const *a) { 10202 return vld2q_s8(a); 10203 } 10204 10205 // CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_s16(i16* %a) #0 { 10206 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 10207 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 10208 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 10209 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10210 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10211 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10212 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 10213 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 10214 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* 10215 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 10216 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10217 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 10218 // CHECK: ret %struct.int16x8x2_t [[TMP6]] 10219 int16x8x2_t test_vld2q_s16(int16_t const *a) { 10220 return vld2q_s16(a); 10221 } 10222 10223 // CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_s32(i32* %a) #0 { 10224 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 10225 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 10226 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 10227 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10228 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 10229 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 10230 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 10231 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] 10232 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* 10233 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 10234 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10235 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 10236 // CHECK: ret %struct.int32x4x2_t [[TMP6]] 10237 int32x4x2_t test_vld2q_s32(int32_t const *a) { 10238 return vld2q_s32(a); 10239 } 10240 10241 // CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_s64(i64* %a) #0 { 10242 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 10243 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 10244 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 10245 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10246 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 10247 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 10248 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 10249 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] 10250 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* 10251 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 10252 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10253 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 10254 // CHECK: ret %struct.int64x2x2_t [[TMP6]] 10255 int64x2x2_t test_vld2q_s64(int64_t const *a) { 10256 return vld2q_s64(a); 10257 } 10258 10259 // CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_f16(half* %a) #0 { 10260 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 10261 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 10262 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 10263 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 10264 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10265 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10266 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 10267 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 10268 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* 10269 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 10270 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10271 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 10272 // CHECK: ret %struct.float16x8x2_t [[TMP6]] 10273 float16x8x2_t test_vld2q_f16(float16_t const *a) { 10274 return vld2q_f16(a); 10275 } 10276 10277 // CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_f32(float* %a) #0 { 10278 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 10279 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 10280 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 10281 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 10282 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>* 10283 // CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP2]]) 10284 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* 10285 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]] 10286 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* 10287 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 10288 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10289 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 10290 // CHECK: ret %struct.float32x4x2_t [[TMP6]] 10291 float32x4x2_t test_vld2q_f32(float32_t const *a) { 10292 return vld2q_f32(a); 10293 } 10294 10295 // CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_f64(double* %a) #0 { 10296 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 10297 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 10298 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 10299 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 10300 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>* 10301 // CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0v2f64(<2 x double>* [[TMP2]]) 10302 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }* 10303 // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]] 10304 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* 10305 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 10306 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10307 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 10308 // CHECK: ret %struct.float64x2x2_t [[TMP6]] 10309 float64x2x2_t test_vld2q_f64(float64_t const *a) { 10310 return vld2q_f64(a); 10311 } 10312 10313 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_p8(i8* %a) #0 { 10314 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 10315 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 10316 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 10317 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10318 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10319 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 10320 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP2]] 10321 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* 10322 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 10323 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) 10324 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 10325 // CHECK: ret %struct.poly8x16x2_t [[TMP5]] 10326 poly8x16x2_t test_vld2q_p8(poly8_t const *a) { 10327 return vld2q_p8(a); 10328 } 10329 10330 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_p16(i16* %a) #0 { 10331 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 10332 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 10333 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 10334 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10335 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10336 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10337 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 10338 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 10339 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* 10340 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 10341 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 10342 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 10343 // CHECK: ret %struct.poly16x8x2_t [[TMP6]] 10344 poly16x8x2_t test_vld2q_p16(poly16_t const *a) { 10345 return vld2q_p16(a); 10346 } 10347 10348 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_u8(i8* %a) #0 { 10349 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 10350 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 10351 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 10352 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10353 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10354 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 10355 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] 10356 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* 10357 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 10358 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) 10359 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 10360 // CHECK: ret %struct.uint8x8x2_t [[TMP5]] 10361 uint8x8x2_t test_vld2_u8(uint8_t const *a) { 10362 return vld2_u8(a); 10363 } 10364 10365 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_u16(i16* %a) #0 { 10366 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 10367 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 10368 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 10369 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10370 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10371 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10372 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 10373 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 10374 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* 10375 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 10376 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10377 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 10378 // CHECK: ret %struct.uint16x4x2_t [[TMP6]] 10379 uint16x4x2_t test_vld2_u16(uint16_t const *a) { 10380 return vld2_u16(a); 10381 } 10382 10383 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_u32(i32* %a) #0 { 10384 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 10385 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 10386 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 10387 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10388 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 10389 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 10390 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 10391 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] 10392 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* 10393 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 10394 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10395 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 10396 // CHECK: ret %struct.uint32x2x2_t [[TMP6]] 10397 uint32x2x2_t test_vld2_u32(uint32_t const *a) { 10398 return vld2_u32(a); 10399 } 10400 10401 // CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_u64(i64* %a) #0 { 10402 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 10403 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 10404 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 10405 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10406 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 10407 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 10408 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 10409 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] 10410 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* 10411 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 10412 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10413 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 10414 // CHECK: ret %struct.uint64x1x2_t [[TMP6]] 10415 uint64x1x2_t test_vld2_u64(uint64_t const *a) { 10416 return vld2_u64(a); 10417 } 10418 10419 // CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_s8(i8* %a) #0 { 10420 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 10421 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 10422 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 10423 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10424 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10425 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 10426 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] 10427 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* 10428 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 10429 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) 10430 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 10431 // CHECK: ret %struct.int8x8x2_t [[TMP5]] 10432 int8x8x2_t test_vld2_s8(int8_t const *a) { 10433 return vld2_s8(a); 10434 } 10435 10436 // CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_s16(i16* %a) #0 { 10437 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 10438 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 10439 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 10440 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10441 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10442 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10443 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 10444 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 10445 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* 10446 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 10447 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10448 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 10449 // CHECK: ret %struct.int16x4x2_t [[TMP6]] 10450 int16x4x2_t test_vld2_s16(int16_t const *a) { 10451 return vld2_s16(a); 10452 } 10453 10454 // CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_s32(i32* %a) #0 { 10455 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 10456 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 10457 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 10458 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10459 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 10460 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 10461 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 10462 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] 10463 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* 10464 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 10465 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10466 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 10467 // CHECK: ret %struct.int32x2x2_t [[TMP6]] 10468 int32x2x2_t test_vld2_s32(int32_t const *a) { 10469 return vld2_s32(a); 10470 } 10471 10472 // CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_s64(i64* %a) #0 { 10473 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 10474 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 10475 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 10476 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10477 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 10478 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 10479 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 10480 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] 10481 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* 10482 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 10483 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10484 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 10485 // CHECK: ret %struct.int64x1x2_t [[TMP6]] 10486 int64x1x2_t test_vld2_s64(int64_t const *a) { 10487 return vld2_s64(a); 10488 } 10489 10490 // CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_f16(half* %a) #0 { 10491 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 10492 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 10493 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 10494 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 10495 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10496 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10497 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 10498 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 10499 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* 10500 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 10501 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10502 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 10503 // CHECK: ret %struct.float16x4x2_t [[TMP6]] 10504 float16x4x2_t test_vld2_f16(float16_t const *a) { 10505 return vld2_f16(a); 10506 } 10507 10508 // CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_f32(float* %a) #0 { 10509 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 10510 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 10511 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 10512 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 10513 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>* 10514 // CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0v2f32(<2 x float>* [[TMP2]]) 10515 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 10516 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]] 10517 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* 10518 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 10519 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10520 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 10521 // CHECK: ret %struct.float32x2x2_t [[TMP6]] 10522 float32x2x2_t test_vld2_f32(float32_t const *a) { 10523 return vld2_f32(a); 10524 } 10525 10526 // CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_f64(double* %a) #0 { 10527 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 10528 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 10529 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 10530 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 10531 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>* 10532 // CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0v1f64(<1 x double>* [[TMP2]]) 10533 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }* 10534 // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]] 10535 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* 10536 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 10537 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10538 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 10539 // CHECK: ret %struct.float64x1x2_t [[TMP6]] 10540 float64x1x2_t test_vld2_f64(float64_t const *a) { 10541 return vld2_f64(a); 10542 } 10543 10544 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_p8(i8* %a) #0 { 10545 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 10546 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 10547 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 10548 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10549 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10550 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 10551 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP2]] 10552 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* 10553 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 10554 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) 10555 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 10556 // CHECK: ret %struct.poly8x8x2_t [[TMP5]] 10557 poly8x8x2_t test_vld2_p8(poly8_t const *a) { 10558 return vld2_p8(a); 10559 } 10560 10561 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_p16(i16* %a) #0 { 10562 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 10563 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 10564 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 10565 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10566 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10567 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10568 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 10569 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 10570 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* 10571 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 10572 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 10573 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 10574 // CHECK: ret %struct.poly16x4x2_t [[TMP6]] 10575 poly16x4x2_t test_vld2_p16(poly16_t const *a) { 10576 return vld2_p16(a); 10577 } 10578 10579 // CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_u8(i8* %a) #0 { 10580 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 10581 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 10582 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 10583 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10584 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10585 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 10586 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 10587 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* 10588 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 10589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) 10590 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 10591 // CHECK: ret %struct.uint8x16x3_t [[TMP5]] 10592 uint8x16x3_t test_vld3q_u8(uint8_t const *a) { 10593 return vld3q_u8(a); 10594 } 10595 10596 // CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_u16(i16* %a) #0 { 10597 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 10598 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 10599 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 10600 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10601 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10602 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10603 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 10604 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 10605 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* 10606 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 10607 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10608 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 10609 // CHECK: ret %struct.uint16x8x3_t [[TMP6]] 10610 uint16x8x3_t test_vld3q_u16(uint16_t const *a) { 10611 return vld3q_u16(a); 10612 } 10613 10614 // CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_u32(i32* %a) #0 { 10615 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 10616 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 10617 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 10618 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10619 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 10620 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 10621 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 10622 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 10623 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* 10624 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 10625 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10626 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 10627 // CHECK: ret %struct.uint32x4x3_t [[TMP6]] 10628 uint32x4x3_t test_vld3q_u32(uint32_t const *a) { 10629 return vld3q_u32(a); 10630 } 10631 10632 // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_u64(i64* %a) #0 { 10633 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 10634 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 10635 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 10636 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10637 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 10638 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 10639 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 10640 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 10641 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* 10642 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 10643 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10644 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 10645 // CHECK: ret %struct.uint64x2x3_t [[TMP6]] 10646 uint64x2x3_t test_vld3q_u64(uint64_t const *a) { 10647 return vld3q_u64(a); 10648 } 10649 10650 // CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_s8(i8* %a) #0 { 10651 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 10652 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 10653 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 10654 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10655 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10656 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 10657 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 10658 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* 10659 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 10660 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) 10661 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 10662 // CHECK: ret %struct.int8x16x3_t [[TMP5]] 10663 int8x16x3_t test_vld3q_s8(int8_t const *a) { 10664 return vld3q_s8(a); 10665 } 10666 10667 // CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_s16(i16* %a) #0 { 10668 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 10669 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 10670 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 10671 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10672 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10673 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10674 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 10675 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 10676 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* 10677 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 10678 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10679 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 10680 // CHECK: ret %struct.int16x8x3_t [[TMP6]] 10681 int16x8x3_t test_vld3q_s16(int16_t const *a) { 10682 return vld3q_s16(a); 10683 } 10684 10685 // CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_s32(i32* %a) #0 { 10686 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 10687 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 10688 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 10689 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10690 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 10691 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 10692 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 10693 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 10694 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* 10695 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 10696 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10697 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 10698 // CHECK: ret %struct.int32x4x3_t [[TMP6]] 10699 int32x4x3_t test_vld3q_s32(int32_t const *a) { 10700 return vld3q_s32(a); 10701 } 10702 10703 // CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_s64(i64* %a) #0 { 10704 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 10705 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 10706 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 10707 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10708 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 10709 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 10710 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 10711 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 10712 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* 10713 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 10714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10715 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 10716 // CHECK: ret %struct.int64x2x3_t [[TMP6]] 10717 int64x2x3_t test_vld3q_s64(int64_t const *a) { 10718 return vld3q_s64(a); 10719 } 10720 10721 // CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_f16(half* %a) #0 { 10722 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 10723 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 10724 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 10725 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 10726 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10727 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10728 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 10729 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 10730 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* 10731 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 10732 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10733 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 10734 // CHECK: ret %struct.float16x8x3_t [[TMP6]] 10735 float16x8x3_t test_vld3q_f16(float16_t const *a) { 10736 return vld3q_f16(a); 10737 } 10738 10739 // CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_f32(float* %a) #0 { 10740 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 10741 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 10742 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 10743 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 10744 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>* 10745 // CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0v4f32(<4 x float>* [[TMP2]]) 10746 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* 10747 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 10748 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* 10749 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 10750 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10751 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 10752 // CHECK: ret %struct.float32x4x3_t [[TMP6]] 10753 float32x4x3_t test_vld3q_f32(float32_t const *a) { 10754 return vld3q_f32(a); 10755 } 10756 10757 // CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_f64(double* %a) #0 { 10758 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 10759 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 10760 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 10761 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 10762 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>* 10763 // CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0v2f64(<2 x double>* [[TMP2]]) 10764 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }* 10765 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 10766 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* 10767 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 10768 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10769 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 10770 // CHECK: ret %struct.float64x2x3_t [[TMP6]] 10771 float64x2x3_t test_vld3q_f64(float64_t const *a) { 10772 return vld3q_f64(a); 10773 } 10774 10775 // CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_p8(i8* %a) #0 { 10776 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 10777 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 10778 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 10779 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 10780 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 10781 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 10782 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 10783 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* 10784 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 10785 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 48, i32 16, i1 false) 10786 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 10787 // CHECK: ret %struct.poly8x16x3_t [[TMP5]] 10788 poly8x16x3_t test_vld3q_p8(poly8_t const *a) { 10789 return vld3q_p8(a); 10790 } 10791 10792 // CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_p16(i16* %a) #0 { 10793 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 10794 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 10795 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 10796 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10797 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 10798 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 10799 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 10800 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 10801 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* 10802 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 10803 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 10804 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 10805 // CHECK: ret %struct.poly16x8x3_t [[TMP6]] 10806 poly16x8x3_t test_vld3q_p16(poly16_t const *a) { 10807 return vld3q_p16(a); 10808 } 10809 10810 // CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_u8(i8* %a) #0 { 10811 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 10812 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 10813 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 10814 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10815 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10816 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 10817 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 10818 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* 10819 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 10820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) 10821 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 10822 // CHECK: ret %struct.uint8x8x3_t [[TMP5]] 10823 uint8x8x3_t test_vld3_u8(uint8_t const *a) { 10824 return vld3_u8(a); 10825 } 10826 10827 // CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_u16(i16* %a) #0 { 10828 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 10829 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 10830 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 10831 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10832 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10833 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10834 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 10835 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 10836 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* 10837 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 10838 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10839 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 10840 // CHECK: ret %struct.uint16x4x3_t [[TMP6]] 10841 uint16x4x3_t test_vld3_u16(uint16_t const *a) { 10842 return vld3_u16(a); 10843 } 10844 10845 // CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_u32(i32* %a) #0 { 10846 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 10847 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 10848 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 10849 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10850 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 10851 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 10852 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 10853 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 10854 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* 10855 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 10856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10857 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 10858 // CHECK: ret %struct.uint32x2x3_t [[TMP6]] 10859 uint32x2x3_t test_vld3_u32(uint32_t const *a) { 10860 return vld3_u32(a); 10861 } 10862 10863 // CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_u64(i64* %a) #0 { 10864 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 10865 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 10866 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 10867 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10868 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 10869 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 10870 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 10871 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 10872 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* 10873 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 10874 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10875 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 10876 // CHECK: ret %struct.uint64x1x3_t [[TMP6]] 10877 uint64x1x3_t test_vld3_u64(uint64_t const *a) { 10878 return vld3_u64(a); 10879 } 10880 10881 // CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_s8(i8* %a) #0 { 10882 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 10883 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 10884 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 10885 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 10886 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 10887 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 10888 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 10889 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* 10890 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 10891 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) 10892 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 10893 // CHECK: ret %struct.int8x8x3_t [[TMP5]] 10894 int8x8x3_t test_vld3_s8(int8_t const *a) { 10895 return vld3_s8(a); 10896 } 10897 10898 // CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_s16(i16* %a) #0 { 10899 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 10900 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 10901 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 10902 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 10903 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10904 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10905 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 10906 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 10907 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* 10908 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 10909 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10910 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 10911 // CHECK: ret %struct.int16x4x3_t [[TMP6]] 10912 int16x4x3_t test_vld3_s16(int16_t const *a) { 10913 return vld3_s16(a); 10914 } 10915 10916 // CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_s32(i32* %a) #0 { 10917 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 10918 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 10919 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 10920 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 10921 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 10922 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 10923 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 10924 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 10925 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* 10926 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 10927 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10928 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 10929 // CHECK: ret %struct.int32x2x3_t [[TMP6]] 10930 int32x2x3_t test_vld3_s32(int32_t const *a) { 10931 return vld3_s32(a); 10932 } 10933 10934 // CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_s64(i64* %a) #0 { 10935 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 10936 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 10937 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 10938 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 10939 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 10940 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 10941 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 10942 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 10943 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* 10944 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 10945 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10946 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 10947 // CHECK: ret %struct.int64x1x3_t [[TMP6]] 10948 int64x1x3_t test_vld3_s64(int64_t const *a) { 10949 return vld3_s64(a); 10950 } 10951 10952 // CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_f16(half* %a) #0 { 10953 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 10954 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 10955 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 10956 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 10957 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 10958 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 10959 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 10960 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 10961 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* 10962 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 10963 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10964 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 10965 // CHECK: ret %struct.float16x4x3_t [[TMP6]] 10966 float16x4x3_t test_vld3_f16(float16_t const *a) { 10967 return vld3_f16(a); 10968 } 10969 10970 // CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_f32(float* %a) #0 { 10971 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 10972 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 10973 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 10974 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 10975 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>* 10976 // CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0v2f32(<2 x float>* [[TMP2]]) 10977 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 10978 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 10979 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* 10980 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 10981 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 10982 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 10983 // CHECK: ret %struct.float32x2x3_t [[TMP6]] 10984 float32x2x3_t test_vld3_f32(float32_t const *a) { 10985 return vld3_f32(a); 10986 } 10987 10988 // CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_f64(double* %a) #0 { 10989 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 10990 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 10991 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 10992 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 10993 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>* 10994 // CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0v1f64(<1 x double>* [[TMP2]]) 10995 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }* 10996 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 10997 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* 10998 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 10999 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 11000 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 11001 // CHECK: ret %struct.float64x1x3_t [[TMP6]] 11002 float64x1x3_t test_vld3_f64(float64_t const *a) { 11003 return vld3_f64(a); 11004 } 11005 11006 // CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_p8(i8* %a) #0 { 11007 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 11008 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 11009 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 11010 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 11011 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 11012 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 11013 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 11014 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* 11015 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 11016 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 24, i32 8, i1 false) 11017 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 11018 // CHECK: ret %struct.poly8x8x3_t [[TMP5]] 11019 poly8x8x3_t test_vld3_p8(poly8_t const *a) { 11020 return vld3_p8(a); 11021 } 11022 11023 // CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_p16(i16* %a) #0 { 11024 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 11025 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 11026 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 11027 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11028 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11029 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11030 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 11031 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11032 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* 11033 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 11034 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 11035 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 11036 // CHECK: ret %struct.poly16x4x3_t [[TMP6]] 11037 poly16x4x3_t test_vld3_p16(poly16_t const *a) { 11038 return vld3_p16(a); 11039 } 11040 11041 // CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_u8(i8* %a) #0 { 11042 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 11043 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 11044 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 11045 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 11046 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 11047 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 11048 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 11049 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* 11050 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 11051 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) 11052 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 11053 // CHECK: ret %struct.uint8x16x4_t [[TMP5]] 11054 uint8x16x4_t test_vld4q_u8(uint8_t const *a) { 11055 return vld4q_u8(a); 11056 } 11057 11058 // CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_u16(i16* %a) #0 { 11059 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 11060 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 11061 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 11062 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11063 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 11064 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 11065 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 11066 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 11067 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* 11068 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 11069 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11070 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 11071 // CHECK: ret %struct.uint16x8x4_t [[TMP6]] 11072 uint16x8x4_t test_vld4q_u16(uint16_t const *a) { 11073 return vld4q_u16(a); 11074 } 11075 11076 // CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_u32(i32* %a) #0 { 11077 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 11078 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 11079 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 11080 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 11081 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 11082 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 11083 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 11084 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 11085 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* 11086 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 11087 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11088 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 11089 // CHECK: ret %struct.uint32x4x4_t [[TMP6]] 11090 uint32x4x4_t test_vld4q_u32(uint32_t const *a) { 11091 return vld4q_u32(a); 11092 } 11093 11094 // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_u64(i64* %a) #0 { 11095 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 11096 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 11097 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 11098 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 11099 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 11100 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 11101 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 11102 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 11103 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* 11104 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 11105 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11106 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 11107 // CHECK: ret %struct.uint64x2x4_t [[TMP6]] 11108 uint64x2x4_t test_vld4q_u64(uint64_t const *a) { 11109 return vld4q_u64(a); 11110 } 11111 11112 // CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_s8(i8* %a) #0 { 11113 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 11114 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 11115 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 11116 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 11117 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 11118 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 11119 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 11120 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* 11121 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 11122 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) 11123 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 11124 // CHECK: ret %struct.int8x16x4_t [[TMP5]] 11125 int8x16x4_t test_vld4q_s8(int8_t const *a) { 11126 return vld4q_s8(a); 11127 } 11128 11129 // CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_s16(i16* %a) #0 { 11130 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 11131 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 11132 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 11133 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11134 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 11135 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 11136 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 11137 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 11138 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* 11139 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 11140 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11141 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 11142 // CHECK: ret %struct.int16x8x4_t [[TMP6]] 11143 int16x8x4_t test_vld4q_s16(int16_t const *a) { 11144 return vld4q_s16(a); 11145 } 11146 11147 // CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_s32(i32* %a) #0 { 11148 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 11149 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 11150 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 11151 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 11152 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i32>* 11153 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0v4i32(<4 x i32>* [[TMP2]]) 11154 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 11155 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 11156 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* 11157 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 11158 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11159 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 11160 // CHECK: ret %struct.int32x4x4_t [[TMP6]] 11161 int32x4x4_t test_vld4q_s32(int32_t const *a) { 11162 return vld4q_s32(a); 11163 } 11164 11165 // CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_s64(i64* %a) #0 { 11166 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 11167 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 11168 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 11169 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 11170 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 11171 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 11172 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 11173 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 11174 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* 11175 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 11176 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11177 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 11178 // CHECK: ret %struct.int64x2x4_t [[TMP6]] 11179 int64x2x4_t test_vld4q_s64(int64_t const *a) { 11180 return vld4q_s64(a); 11181 } 11182 11183 // CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_f16(half* %a) #0 { 11184 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 11185 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 11186 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 11187 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 11188 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 11189 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 11190 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 11191 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 11192 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* 11193 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 11194 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11195 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 11196 // CHECK: ret %struct.float16x8x4_t [[TMP6]] 11197 float16x8x4_t test_vld4q_f16(float16_t const *a) { 11198 return vld4q_f16(a); 11199 } 11200 11201 // CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_f32(float* %a) #0 { 11202 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 11203 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 11204 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 11205 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 11206 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x float>* 11207 // CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0v4f32(<4 x float>* [[TMP2]]) 11208 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 11209 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 11210 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* 11211 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 11212 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11213 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 11214 // CHECK: ret %struct.float32x4x4_t [[TMP6]] 11215 float32x4x4_t test_vld4q_f32(float32_t const *a) { 11216 return vld4q_f32(a); 11217 } 11218 11219 // CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_f64(double* %a) #0 { 11220 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 11221 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 11222 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 11223 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 11224 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x double>* 11225 // CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0v2f64(<2 x double>* [[TMP2]]) 11226 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* 11227 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 11228 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* 11229 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 11230 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11231 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 11232 // CHECK: ret %struct.float64x2x4_t [[TMP6]] 11233 float64x2x4_t test_vld4q_f64(float64_t const *a) { 11234 return vld4q_f64(a); 11235 } 11236 11237 // CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_p8(i8* %a) #0 { 11238 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 11239 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 11240 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 11241 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <16 x i8>* 11242 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* [[TMP1]]) 11243 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 11244 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP2]] 11245 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* 11246 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 11247 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 64, i32 16, i1 false) 11248 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 11249 // CHECK: ret %struct.poly8x16x4_t [[TMP5]] 11250 poly8x16x4_t test_vld4q_p8(poly8_t const *a) { 11251 return vld4q_p8(a); 11252 } 11253 11254 // CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_p16(i16* %a) #0 { 11255 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 11256 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 11257 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 11258 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11259 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i16>* 11260 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0v8i16(<8 x i16>* [[TMP2]]) 11261 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 11262 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 11263 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* 11264 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 11265 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 11266 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 11267 // CHECK: ret %struct.poly16x8x4_t [[TMP6]] 11268 poly16x8x4_t test_vld4q_p16(poly16_t const *a) { 11269 return vld4q_p16(a); 11270 } 11271 11272 // CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_u8(i8* %a) #0 { 11273 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 11274 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 11275 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 11276 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 11277 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 11278 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 11279 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 11280 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* 11281 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 11282 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) 11283 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 11284 // CHECK: ret %struct.uint8x8x4_t [[TMP5]] 11285 uint8x8x4_t test_vld4_u8(uint8_t const *a) { 11286 return vld4_u8(a); 11287 } 11288 11289 // CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_u16(i16* %a) #0 { 11290 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 11291 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 11292 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 11293 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11294 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11295 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11296 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 11297 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11298 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* 11299 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 11300 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11301 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 11302 // CHECK: ret %struct.uint16x4x4_t [[TMP6]] 11303 uint16x4x4_t test_vld4_u16(uint16_t const *a) { 11304 return vld4_u16(a); 11305 } 11306 11307 // CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_u32(i32* %a) #0 { 11308 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 11309 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 11310 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 11311 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 11312 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 11313 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 11314 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 11315 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 11316 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* 11317 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 11318 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11319 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 11320 // CHECK: ret %struct.uint32x2x4_t [[TMP6]] 11321 uint32x2x4_t test_vld4_u32(uint32_t const *a) { 11322 return vld4_u32(a); 11323 } 11324 11325 // CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_u64(i64* %a) #0 { 11326 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 11327 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 11328 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 11329 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 11330 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 11331 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 11332 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 11333 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 11334 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* 11335 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 11336 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11337 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 11338 // CHECK: ret %struct.uint64x1x4_t [[TMP6]] 11339 uint64x1x4_t test_vld4_u64(uint64_t const *a) { 11340 return vld4_u64(a); 11341 } 11342 11343 // CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_s8(i8* %a) #0 { 11344 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 11345 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 11346 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 11347 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 11348 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 11349 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 11350 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 11351 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* 11352 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 11353 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) 11354 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 11355 // CHECK: ret %struct.int8x8x4_t [[TMP5]] 11356 int8x8x4_t test_vld4_s8(int8_t const *a) { 11357 return vld4_s8(a); 11358 } 11359 11360 // CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_s16(i16* %a) #0 { 11361 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 11362 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 11363 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 11364 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11365 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11366 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11367 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 11368 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11369 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* 11370 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 11371 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11372 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 11373 // CHECK: ret %struct.int16x4x4_t [[TMP6]] 11374 int16x4x4_t test_vld4_s16(int16_t const *a) { 11375 return vld4_s16(a); 11376 } 11377 11378 // CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_s32(i32* %a) #0 { 11379 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 11380 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 11381 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 11382 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 11383 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i32>* 11384 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0v2i32(<2 x i32>* [[TMP2]]) 11385 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 11386 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 11387 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* 11388 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 11389 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11390 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 11391 // CHECK: ret %struct.int32x2x4_t [[TMP6]] 11392 int32x2x4_t test_vld4_s32(int32_t const *a) { 11393 return vld4_s32(a); 11394 } 11395 11396 // CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_s64(i64* %a) #0 { 11397 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 11398 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 11399 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 11400 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 11401 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 11402 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 11403 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 11404 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 11405 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* 11406 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 11407 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11408 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 11409 // CHECK: ret %struct.int64x1x4_t [[TMP6]] 11410 int64x1x4_t test_vld4_s64(int64_t const *a) { 11411 return vld4_s64(a); 11412 } 11413 11414 // CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_f16(half* %a) #0 { 11415 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 11416 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 11417 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 11418 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 11419 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11420 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11421 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 11422 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11423 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* 11424 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 11425 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11426 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 11427 // CHECK: ret %struct.float16x4x4_t [[TMP6]] 11428 float16x4x4_t test_vld4_f16(float16_t const *a) { 11429 return vld4_f16(a); 11430 } 11431 11432 // CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_f32(float* %a) #0 { 11433 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 11434 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 11435 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 11436 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 11437 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x float>* 11438 // CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0v2f32(<2 x float>* [[TMP2]]) 11439 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 11440 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 11441 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* 11442 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 11443 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11444 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 11445 // CHECK: ret %struct.float32x2x4_t [[TMP6]] 11446 float32x2x4_t test_vld4_f32(float32_t const *a) { 11447 return vld4_f32(a); 11448 } 11449 11450 // CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_f64(double* %a) #0 { 11451 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 11452 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 11453 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 11454 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 11455 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x double>* 11456 // CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0v1f64(<1 x double>* [[TMP2]]) 11457 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* 11458 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 11459 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* 11460 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 11461 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11462 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 11463 // CHECK: ret %struct.float64x1x4_t [[TMP6]] 11464 float64x1x4_t test_vld4_f64(float64_t const *a) { 11465 return vld4_f64(a); 11466 } 11467 11468 // CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_p8(i8* %a) #0 { 11469 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 11470 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 11471 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 11472 // CHECK: [[TMP1:%.*]] = bitcast i8* %a to <8 x i8>* 11473 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0v8i8(<8 x i8>* [[TMP1]]) 11474 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 11475 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP2]] 11476 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* 11477 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 11478 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 8, i1 false) 11479 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 11480 // CHECK: ret %struct.poly8x8x4_t [[TMP5]] 11481 poly8x8x4_t test_vld4_p8(poly8_t const *a) { 11482 return vld4_p8(a); 11483 } 11484 11485 // CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_p16(i16* %a) #0 { 11486 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 11487 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 11488 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 11489 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 11490 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i16>* 11491 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* [[TMP2]]) 11492 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 11493 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 11494 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* 11495 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 11496 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 11497 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 11498 // CHECK: ret %struct.poly16x4x4_t [[TMP6]] 11499 poly16x4x4_t test_vld4_p16(poly16_t const *a) { 11500 return vld4_p16(a); 11501 } 11502 11503 // CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 { 11504 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 11505 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] 11506 // CHECK: ret void 11507 void test_vst1q_u8(uint8_t *a, uint8x16_t b) { 11508 vst1q_u8(a, b); 11509 } 11510 11511 // CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 { 11512 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11513 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11514 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 11515 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11516 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]] 11517 // CHECK: ret void 11518 void test_vst1q_u16(uint16_t *a, uint16x8_t b) { 11519 vst1q_u16(a, b); 11520 } 11521 11522 // CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 { 11523 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 11524 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11525 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 11526 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11527 // CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]] 11528 // CHECK: ret void 11529 void test_vst1q_u32(uint32_t *a, uint32x4_t b) { 11530 vst1q_u32(a, b); 11531 } 11532 11533 // CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 { 11534 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 11535 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 11536 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 11537 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 11538 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]] 11539 // CHECK: ret void 11540 void test_vst1q_u64(uint64_t *a, uint64x2_t b) { 11541 vst1q_u64(a, b); 11542 } 11543 11544 // CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 { 11545 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 11546 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] 11547 // CHECK: ret void 11548 void test_vst1q_s8(int8_t *a, int8x16_t b) { 11549 vst1q_s8(a, b); 11550 } 11551 11552 // CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 { 11553 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11554 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11555 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 11556 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11557 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]] 11558 // CHECK: ret void 11559 void test_vst1q_s16(int16_t *a, int16x8_t b) { 11560 vst1q_s16(a, b); 11561 } 11562 11563 // CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 { 11564 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 11565 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11566 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 11567 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11568 // CHECK: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP2]] 11569 // CHECK: ret void 11570 void test_vst1q_s32(int32_t *a, int32x4_t b) { 11571 vst1q_s32(a, b); 11572 } 11573 11574 // CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 { 11575 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 11576 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 11577 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 11578 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 11579 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]] 11580 // CHECK: ret void 11581 void test_vst1q_s64(int64_t *a, int64x2_t b) { 11582 vst1q_s64(a, b); 11583 } 11584 11585 // CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 { 11586 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 11587 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 11588 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 11589 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11590 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]] 11591 // CHECK: ret void 11592 void test_vst1q_f16(float16_t *a, float16x8_t b) { 11593 vst1q_f16(a, b); 11594 } 11595 11596 // CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 { 11597 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 11598 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 11599 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 11600 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 11601 // CHECK: store <4 x float> [[TMP3]], <4 x float>* [[TMP2]] 11602 // CHECK: ret void 11603 void test_vst1q_f32(float32_t *a, float32x4_t b) { 11604 vst1q_f32(a, b); 11605 } 11606 11607 // CHECK-LABEL: define void @test_vst1q_f64(double* %a, <2 x double> %b) #0 { 11608 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 11609 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 11610 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x double>* 11611 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 11612 // CHECK: store <2 x double> [[TMP3]], <2 x double>* [[TMP2]] 11613 // CHECK: ret void 11614 void test_vst1q_f64(float64_t *a, float64x2_t b) { 11615 vst1q_f64(a, b); 11616 } 11617 11618 // CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 { 11619 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <16 x i8>* 11620 // CHECK: store <16 x i8> %b, <16 x i8>* [[TMP0]] 11621 // CHECK: ret void 11622 void test_vst1q_p8(poly8_t *a, poly8x16_t b) { 11623 vst1q_p8(a, b); 11624 } 11625 11626 // CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 { 11627 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11628 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11629 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 11630 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11631 // CHECK: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP2]] 11632 // CHECK: ret void 11633 void test_vst1q_p16(poly16_t *a, poly16x8_t b) { 11634 vst1q_p16(a, b); 11635 } 11636 11637 // CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 { 11638 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 11639 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] 11640 // CHECK: ret void 11641 void test_vst1_u8(uint8_t *a, uint8x8_t b) { 11642 vst1_u8(a, b); 11643 } 11644 11645 // CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 { 11646 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11647 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11648 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 11649 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11650 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]] 11651 // CHECK: ret void 11652 void test_vst1_u16(uint16_t *a, uint16x4_t b) { 11653 vst1_u16(a, b); 11654 } 11655 11656 // CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 { 11657 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 11658 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11659 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 11660 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11661 // CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]] 11662 // CHECK: ret void 11663 void test_vst1_u32(uint32_t *a, uint32x2_t b) { 11664 vst1_u32(a, b); 11665 } 11666 11667 // CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 { 11668 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 11669 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 11670 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 11671 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 11672 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]] 11673 // CHECK: ret void 11674 void test_vst1_u64(uint64_t *a, uint64x1_t b) { 11675 vst1_u64(a, b); 11676 } 11677 11678 // CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 { 11679 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 11680 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] 11681 // CHECK: ret void 11682 void test_vst1_s8(int8_t *a, int8x8_t b) { 11683 vst1_s8(a, b); 11684 } 11685 11686 // CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 { 11687 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11688 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11689 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 11690 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11691 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]] 11692 // CHECK: ret void 11693 void test_vst1_s16(int16_t *a, int16x4_t b) { 11694 vst1_s16(a, b); 11695 } 11696 11697 // CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 { 11698 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 11699 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11700 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 11701 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11702 // CHECK: store <2 x i32> [[TMP3]], <2 x i32>* [[TMP2]] 11703 // CHECK: ret void 11704 void test_vst1_s32(int32_t *a, int32x2_t b) { 11705 vst1_s32(a, b); 11706 } 11707 11708 // CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 { 11709 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 11710 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 11711 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 11712 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 11713 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]] 11714 // CHECK: ret void 11715 void test_vst1_s64(int64_t *a, int64x1_t b) { 11716 vst1_s64(a, b); 11717 } 11718 11719 // CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 { 11720 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 11721 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 11722 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 11723 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11724 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]] 11725 // CHECK: ret void 11726 void test_vst1_f16(float16_t *a, float16x4_t b) { 11727 vst1_f16(a, b); 11728 } 11729 11730 // CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 { 11731 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 11732 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 11733 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 11734 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 11735 // CHECK: store <2 x float> [[TMP3]], <2 x float>* [[TMP2]] 11736 // CHECK: ret void 11737 void test_vst1_f32(float32_t *a, float32x2_t b) { 11738 vst1_f32(a, b); 11739 } 11740 11741 // CHECK-LABEL: define void @test_vst1_f64(double* %a, <1 x double> %b) #0 { 11742 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 11743 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 11744 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x double>* 11745 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 11746 // CHECK: store <1 x double> [[TMP3]], <1 x double>* [[TMP2]] 11747 // CHECK: ret void 11748 void test_vst1_f64(float64_t *a, float64x1_t b) { 11749 vst1_f64(a, b); 11750 } 11751 11752 // CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 { 11753 // CHECK: [[TMP0:%.*]] = bitcast i8* %a to <8 x i8>* 11754 // CHECK: store <8 x i8> %b, <8 x i8>* [[TMP0]] 11755 // CHECK: ret void 11756 void test_vst1_p8(poly8_t *a, poly8x8_t b) { 11757 vst1_p8(a, b); 11758 } 11759 11760 // CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 { 11761 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 11762 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11763 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 11764 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11765 // CHECK: store <4 x i16> [[TMP3]], <4 x i16>* [[TMP2]] 11766 // CHECK: ret void 11767 void test_vst1_p16(poly16_t *a, poly16x4_t b) { 11768 vst1_p16(a, b); 11769 } 11770 11771 // CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 11772 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 11773 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 11774 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 11775 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 11776 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* 11777 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* 11778 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11779 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 11780 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 11781 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 11782 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 11783 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 11784 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 11785 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 11786 // CHECK: ret void 11787 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) { 11788 vst2q_u8(a, b); 11789 } 11790 11791 // CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 11792 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 11793 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 11794 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 11795 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 11796 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 11797 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 11798 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11799 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 11800 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 11801 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 11802 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 11803 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11804 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 11805 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 11806 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 11807 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11808 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11809 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11810 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]]) 11811 // CHECK: ret void 11812 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) { 11813 vst2q_u16(a, b); 11814 } 11815 11816 // CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 11817 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 11818 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 11819 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 11820 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 11821 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 11822 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 11823 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11824 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 11825 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 11826 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 11827 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 11828 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 11829 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 11830 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 11831 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 11832 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 11833 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 11834 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 11835 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]]) 11836 // CHECK: ret void 11837 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) { 11838 vst2q_u32(a, b); 11839 } 11840 11841 // CHECK-LABEL: define void @test_vst2q_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 11842 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 11843 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 11844 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 11845 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 11846 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* 11847 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* 11848 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11849 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 11850 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 11851 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 11852 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 11853 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 11854 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 11855 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 11856 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 11857 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 11858 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 11859 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 11860 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]]) 11861 // CHECK: ret void 11862 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) { 11863 vst2q_u64(a, b); 11864 } 11865 11866 // CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 11867 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 11868 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 11869 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 11870 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 11871 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* 11872 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* 11873 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11874 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 11875 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 11876 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 11877 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 11878 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 11879 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 11880 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 11881 // CHECK: ret void 11882 void test_vst2q_s8(int8_t *a, int8x16x2_t b) { 11883 vst2q_s8(a, b); 11884 } 11885 11886 // CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 11887 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 11888 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 11889 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 11890 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 11891 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 11892 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 11893 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11894 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 11895 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 11896 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 11897 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 11898 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11899 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 11900 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 11901 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 11902 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11903 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11904 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11905 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]]) 11906 // CHECK: ret void 11907 void test_vst2q_s16(int16_t *a, int16x8x2_t b) { 11908 vst2q_s16(a, b); 11909 } 11910 11911 // CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 11912 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 11913 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 11914 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 11915 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 11916 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 11917 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 11918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11919 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 11920 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 11921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 11922 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 11923 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 11924 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 11925 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 11926 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 11927 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 11928 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 11929 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 11930 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i8* [[TMP2]]) 11931 // CHECK: ret void 11932 void test_vst2q_s32(int32_t *a, int32x4x2_t b) { 11933 vst2q_s32(a, b); 11934 } 11935 11936 // CHECK-LABEL: define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 11937 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 11938 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 11939 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 11940 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 11941 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* 11942 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* 11943 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11944 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 11945 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 11946 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 11947 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 11948 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 11949 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 11950 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 11951 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 11952 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 11953 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 11954 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 11955 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]]) 11956 // CHECK: ret void 11957 void test_vst2q_s64(int64_t *a, int64x2x2_t b) { 11958 vst2q_s64(a, b); 11959 } 11960 11961 // CHECK-LABEL: define void @test_vst2q_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 { 11962 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 11963 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 11964 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 11965 // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 11966 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 11967 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 11968 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11969 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 11970 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 11971 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 11972 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 11973 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 11974 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 11975 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 11976 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 11977 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 11978 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11979 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11980 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]]) 11981 // CHECK: ret void 11982 void test_vst2q_f16(float16_t *a, float16x8x2_t b) { 11983 vst2q_f16(a, b); 11984 } 11985 11986 // CHECK-LABEL: define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 { 11987 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 11988 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 11989 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 11990 // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 11991 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 11992 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 11993 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 11994 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 11995 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 11996 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 11997 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 11998 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 11999 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 12000 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 12001 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 12002 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 12003 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 12004 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 12005 // CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i8* [[TMP2]]) 12006 // CHECK: ret void 12007 void test_vst2q_f32(float32_t *a, float32x4x2_t b) { 12008 vst2q_f32(a, b); 12009 } 12010 12011 // CHECK-LABEL: define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 { 12012 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 12013 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 12014 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 12015 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 12016 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* 12017 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* 12018 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 12019 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 12020 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 12021 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 12022 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 12023 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 12024 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 12025 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 12026 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 12027 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 12028 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 12029 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 12030 // CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i8* [[TMP2]]) 12031 // CHECK: ret void 12032 void test_vst2q_f64(float64_t *a, float64x2x2_t b) { 12033 vst2q_f64(a, b); 12034 } 12035 12036 // CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 12037 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 12038 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 12039 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 12040 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 12041 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* 12042 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* 12043 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 12044 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 12045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 12046 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 12047 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 12048 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 12049 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 12050 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 12051 // CHECK: ret void 12052 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) { 12053 vst2q_p8(a, b); 12054 } 12055 12056 // CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 12057 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 12058 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 12059 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 12060 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 12061 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 12062 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 12063 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 12064 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12065 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 12066 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 12067 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 12068 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 12069 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 12070 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 12071 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 12072 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 12073 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12074 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12075 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i8* [[TMP2]]) 12076 // CHECK: ret void 12077 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) { 12078 vst2q_p16(a, b); 12079 } 12080 12081 // CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 12082 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 12083 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 12084 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 12085 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 12086 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 12087 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 12088 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12089 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 12090 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 12091 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 12093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12094 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12095 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 12096 // CHECK: ret void 12097 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) { 12098 vst2_u8(a, b); 12099 } 12100 12101 // CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 12102 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 12103 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 12104 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 12105 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 12106 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 12107 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 12108 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12109 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12110 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 12111 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 12112 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12113 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12114 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 12115 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12116 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12117 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12118 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12119 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12120 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]]) 12121 // CHECK: ret void 12122 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) { 12123 vst2_u16(a, b); 12124 } 12125 12126 // CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 12127 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 12128 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 12129 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 12130 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 12131 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 12132 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 12133 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12134 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12135 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 12136 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 12137 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 12138 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12139 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 12140 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 12141 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 12142 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12143 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12144 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12145 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]]) 12146 // CHECK: ret void 12147 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) { 12148 vst2_u32(a, b); 12149 } 12150 12151 // CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 12152 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 12153 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 12154 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 12155 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 12156 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* 12157 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* 12158 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12159 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12160 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 12161 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 12162 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 12163 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12164 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 12165 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 12166 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 12167 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12168 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12169 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12170 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]]) 12171 // CHECK: ret void 12172 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) { 12173 vst2_u64(a, b); 12174 } 12175 12176 // CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 12177 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 12178 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 12179 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 12180 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 12181 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 12182 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 12183 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12184 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 12185 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 12186 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12187 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 12188 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12189 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12190 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 12191 // CHECK: ret void 12192 void test_vst2_s8(int8_t *a, int8x8x2_t b) { 12193 vst2_s8(a, b); 12194 } 12195 12196 // CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 12197 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 12198 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 12199 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 12200 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 12201 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 12202 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 12203 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12204 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12205 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 12206 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 12207 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12208 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12209 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 12210 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12211 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12212 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12213 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12214 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12215 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]]) 12216 // CHECK: ret void 12217 void test_vst2_s16(int16_t *a, int16x4x2_t b) { 12218 vst2_s16(a, b); 12219 } 12220 12221 // CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 12222 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 12223 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 12224 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 12225 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 12226 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 12227 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 12228 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12229 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12230 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 12231 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 12232 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 12233 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12234 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 12235 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 12236 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 12237 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12238 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12239 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12240 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i8* [[TMP2]]) 12241 // CHECK: ret void 12242 void test_vst2_s32(int32_t *a, int32x2x2_t b) { 12243 vst2_s32(a, b); 12244 } 12245 12246 // CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 12247 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 12248 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 12249 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 12250 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 12251 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* 12252 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* 12253 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12254 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12255 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 12256 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 12257 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 12258 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12259 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 12260 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 12261 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 12262 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12263 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12264 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12265 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]]) 12266 // CHECK: ret void 12267 void test_vst2_s64(int64_t *a, int64x1x2_t b) { 12268 vst2_s64(a, b); 12269 } 12270 12271 // CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 { 12272 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 12273 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 12274 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 12275 // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 12276 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 12277 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 12278 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12279 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 12280 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 12281 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 12282 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 12283 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 12284 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 12285 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 12286 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 12287 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 12288 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12289 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12290 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]]) 12291 // CHECK: ret void 12292 void test_vst2_f16(float16_t *a, float16x4x2_t b) { 12293 vst2_f16(a, b); 12294 } 12295 12296 // CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 { 12297 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 12298 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 12299 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 12300 // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 12301 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 12302 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 12303 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12304 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 12305 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 12306 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 12307 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 12308 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 12309 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 12310 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 12311 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 12312 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 12313 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 12314 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 12315 // CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i8* [[TMP2]]) 12316 // CHECK: ret void 12317 void test_vst2_f32(float32_t *a, float32x2x2_t b) { 12318 vst2_f32(a, b); 12319 } 12320 12321 // CHECK-LABEL: define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 { 12322 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 12323 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 12324 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 12325 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 12326 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* 12327 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* 12328 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12329 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 12330 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 12331 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 12332 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 12333 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 12334 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 12335 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 12336 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 12337 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 12338 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 12339 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 12340 // CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i8* [[TMP2]]) 12341 // CHECK: ret void 12342 void test_vst2_f64(float64_t *a, float64x1x2_t b) { 12343 vst2_f64(a, b); 12344 } 12345 12346 // CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 12347 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 12348 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 12349 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 12350 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 12351 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 12352 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 12353 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12354 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 12355 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 12356 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12357 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 12358 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12359 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12360 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 12361 // CHECK: ret void 12362 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) { 12363 vst2_p8(a, b); 12364 } 12365 12366 // CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 12367 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 12368 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 12369 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 12370 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 12371 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 12372 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 12373 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 12374 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12375 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 12376 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 12377 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12378 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12379 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 12380 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12381 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12382 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12383 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12384 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12385 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i8* [[TMP2]]) 12386 // CHECK: ret void 12387 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) { 12388 vst2_p16(a, b); 12389 } 12390 12391 // CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 12392 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 12393 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 12394 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 12395 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 12396 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* 12397 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* 12398 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12399 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 12400 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 12401 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 12402 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 12403 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 12404 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 12405 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 12406 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 12407 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 12408 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 12409 // CHECK: ret void 12410 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) { 12411 vst3q_u8(a, b); 12412 } 12413 12414 // CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 12415 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 12416 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 12417 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 12418 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 12419 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 12420 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 12421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12422 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12423 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 12424 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 12425 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 12426 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 12427 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 12428 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 12429 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 12430 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 12431 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 12432 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 12433 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 12434 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 12435 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12436 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12437 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 12438 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) 12439 // CHECK: ret void 12440 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) { 12441 vst3q_u16(a, b); 12442 } 12443 12444 // CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 12445 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 12446 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 12447 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 12448 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 12449 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 12450 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 12451 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12452 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12453 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 12454 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 12455 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 12456 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 12457 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 12458 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 12459 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 12460 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 12461 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 12462 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 12463 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 12464 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 12465 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 12466 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 12467 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 12468 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]]) 12469 // CHECK: ret void 12470 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) { 12471 vst3q_u32(a, b); 12472 } 12473 12474 // CHECK-LABEL: define void @test_vst3q_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 12475 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 12476 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 12477 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 12478 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 12479 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* 12480 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* 12481 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12482 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12483 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 12484 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 12485 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 12486 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 12487 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 12488 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 12489 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 12490 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 12491 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 12492 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 12493 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 12494 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 12495 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 12496 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 12497 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 12498 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) 12499 // CHECK: ret void 12500 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) { 12501 vst3q_u64(a, b); 12502 } 12503 12504 // CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 12505 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 12506 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 12507 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 12508 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 12509 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* 12510 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* 12511 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12512 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 12513 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 12514 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 12515 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 12516 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 12517 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 12518 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 12519 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 12520 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 12521 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 12522 // CHECK: ret void 12523 void test_vst3q_s8(int8_t *a, int8x16x3_t b) { 12524 vst3q_s8(a, b); 12525 } 12526 12527 // CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 12528 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 12529 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 12530 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 12531 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 12532 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 12533 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 12534 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12535 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12536 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 12537 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 12538 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 12539 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 12540 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 12541 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 12542 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 12543 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 12544 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 12545 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 12546 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 12547 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 12548 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12549 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12550 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 12551 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) 12552 // CHECK: ret void 12553 void test_vst3q_s16(int16_t *a, int16x8x3_t b) { 12554 vst3q_s16(a, b); 12555 } 12556 12557 // CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 12558 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 12559 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 12560 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 12561 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 12562 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 12563 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 12564 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12565 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12566 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 12567 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 12568 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 12569 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 12570 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 12571 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 12572 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 12573 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 12574 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 12575 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 12576 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 12577 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 12578 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 12579 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 12580 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 12581 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]]) 12582 // CHECK: ret void 12583 void test_vst3q_s32(int32_t *a, int32x4x3_t b) { 12584 vst3q_s32(a, b); 12585 } 12586 12587 // CHECK-LABEL: define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 12588 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 12589 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 12590 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 12591 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 12592 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* 12593 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* 12594 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12595 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12596 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 12597 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 12598 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 12599 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 12600 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 12601 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 12602 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 12603 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 12604 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 12605 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 12606 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 12607 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 12608 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 12609 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 12610 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 12611 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) 12612 // CHECK: ret void 12613 void test_vst3q_s64(int64_t *a, int64x2x3_t b) { 12614 vst3q_s64(a, b); 12615 } 12616 12617 // CHECK-LABEL: define void @test_vst3q_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 { 12618 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 12619 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 12620 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 12621 // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 12622 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 12623 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 12624 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12625 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 12626 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 12627 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 12628 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 12629 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 12630 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 12631 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 12632 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 12633 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 12634 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 12635 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 12636 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 12637 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 12638 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12639 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12640 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 12641 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) 12642 // CHECK: ret void 12643 void test_vst3q_f16(float16_t *a, float16x8x3_t b) { 12644 vst3q_f16(a, b); 12645 } 12646 12647 // CHECK-LABEL: define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 { 12648 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 12649 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 12650 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 12651 // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 12652 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 12653 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 12654 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12655 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 12656 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 12657 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 12658 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 12659 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 12660 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 12661 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 12662 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 12663 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 12664 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 12665 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 12666 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 12667 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 12668 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 12669 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 12670 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 12671 // CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]]) 12672 // CHECK: ret void 12673 void test_vst3q_f32(float32_t *a, float32x4x3_t b) { 12674 vst3q_f32(a, b); 12675 } 12676 12677 // CHECK-LABEL: define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 { 12678 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 12679 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 12680 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 12681 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 12682 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* 12683 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* 12684 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12685 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 12686 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 12687 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 12688 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 12689 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 12690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 12691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 12692 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 12693 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 12694 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 12695 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 12696 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 12697 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 12698 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 12699 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 12700 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 12701 // CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]]) 12702 // CHECK: ret void 12703 void test_vst3q_f64(float64_t *a, float64x2x3_t b) { 12704 vst3q_f64(a, b); 12705 } 12706 12707 // CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 12708 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 12709 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 12710 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 12711 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 12712 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* 12713 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* 12714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12715 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 12716 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 12717 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 12718 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 12719 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 12720 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 12721 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 12722 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 12723 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 12724 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 12725 // CHECK: ret void 12726 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) { 12727 vst3q_p8(a, b); 12728 } 12729 12730 // CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 12731 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 12732 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 12733 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 12734 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 12735 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 12736 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 12737 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 12738 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12739 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 12740 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 12741 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 12742 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 12743 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 12744 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 12745 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 12746 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 12747 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 12748 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 12749 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 12750 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 12751 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 12752 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 12753 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 12754 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) 12755 // CHECK: ret void 12756 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) { 12757 vst3q_p16(a, b); 12758 } 12759 12760 // CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 12761 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 12762 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 12763 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 12764 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 12765 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 12766 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 12767 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12768 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 12769 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 12770 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12771 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 12772 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12773 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12774 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 12775 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 12776 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 12777 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 12778 // CHECK: ret void 12779 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) { 12780 vst3_u8(a, b); 12781 } 12782 12783 // CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 12784 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 12785 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 12786 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 12787 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 12788 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 12789 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 12790 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12791 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12792 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 12793 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 12794 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12795 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12796 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 12797 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12798 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12799 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12800 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 12801 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 12802 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 12803 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 12804 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12805 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12806 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 12807 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) 12808 // CHECK: ret void 12809 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) { 12810 vst3_u16(a, b); 12811 } 12812 12813 // CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 12814 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 12815 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 12816 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 12817 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 12818 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 12819 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 12820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12821 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12822 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 12823 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 12824 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 12825 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12826 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 12827 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 12828 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 12829 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12830 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 12831 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 12832 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 12833 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 12834 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12835 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12836 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 12837 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]]) 12838 // CHECK: ret void 12839 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) { 12840 vst3_u32(a, b); 12841 } 12842 12843 // CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 12844 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 12845 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 12846 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 12847 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 12848 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* 12849 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* 12850 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12851 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12852 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 12853 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 12854 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 12855 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12856 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 12857 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 12858 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 12859 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12860 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 12861 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 12862 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 12863 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 12864 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12865 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12866 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 12867 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) 12868 // CHECK: ret void 12869 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) { 12870 vst3_u64(a, b); 12871 } 12872 12873 // CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 12874 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 12875 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 12876 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 12877 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 12878 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 12879 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 12880 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12881 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 12882 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 12883 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 12884 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 12885 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 12886 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 12887 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 12888 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 12889 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 12890 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 12891 // CHECK: ret void 12892 void test_vst3_s8(int8_t *a, int8x8x3_t b) { 12893 vst3_s8(a, b); 12894 } 12895 12896 // CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 12897 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 12898 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 12899 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 12900 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 12901 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 12902 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 12903 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12904 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 12905 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 12906 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 12907 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 12908 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12909 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 12910 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 12911 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 12912 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12913 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 12914 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 12915 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 12916 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 12917 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12918 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12919 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 12920 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) 12921 // CHECK: ret void 12922 void test_vst3_s16(int16_t *a, int16x4x3_t b) { 12923 vst3_s16(a, b); 12924 } 12925 12926 // CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 12927 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 12928 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 12929 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 12930 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 12931 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 12932 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 12933 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12934 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 12935 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 12936 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 12937 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 12938 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12939 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 12940 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 12941 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 12942 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12943 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 12944 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 12945 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 12946 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 12947 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12948 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12949 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 12950 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]]) 12951 // CHECK: ret void 12952 void test_vst3_s32(int32_t *a, int32x2x3_t b) { 12953 vst3_s32(a, b); 12954 } 12955 12956 // CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 12957 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 12958 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 12959 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 12960 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 12961 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* 12962 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* 12963 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12964 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 12965 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 12966 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 12967 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 12968 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12969 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 12970 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 12971 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 12972 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12973 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 12974 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 12975 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 12976 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 12977 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12978 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12979 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 12980 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) 12981 // CHECK: ret void 12982 void test_vst3_s64(int64_t *a, int64x1x3_t b) { 12983 vst3_s64(a, b); 12984 } 12985 12986 // CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 { 12987 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 12988 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 12989 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 12990 // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 12991 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 12992 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 12993 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 12994 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 12995 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 12996 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 12997 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 12998 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 12999 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 13000 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 13001 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 13002 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 13003 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 13004 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 13005 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 13006 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 13007 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13008 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13009 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13010 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) 13011 // CHECK: ret void 13012 void test_vst3_f16(float16_t *a, float16x4x3_t b) { 13013 vst3_f16(a, b); 13014 } 13015 13016 // CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 { 13017 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 13018 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 13019 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 13020 // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 13021 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 13022 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 13023 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 13024 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 13025 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 13026 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 13027 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 13028 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 13029 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 13030 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 13031 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 13032 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 13033 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 13034 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 13035 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 13036 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 13037 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 13038 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 13039 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 13040 // CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]]) 13041 // CHECK: ret void 13042 void test_vst3_f32(float32_t *a, float32x2x3_t b) { 13043 vst3_f32(a, b); 13044 } 13045 13046 // CHECK-LABEL: define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 { 13047 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 13048 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 13049 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 13050 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 13051 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* 13052 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* 13053 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 13054 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 13055 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 13056 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 13057 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 13058 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 13059 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 13060 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 13061 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 13062 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 13063 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 13064 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 13065 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 13066 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 13067 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 13068 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 13069 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 13070 // CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]]) 13071 // CHECK: ret void 13072 void test_vst3_f64(float64_t *a, float64x1x3_t b) { 13073 vst3_f64(a, b); 13074 } 13075 13076 // CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 13077 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 13078 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 13079 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 13080 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 13081 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 13082 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 13083 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 13084 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 13085 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 13086 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 13087 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 13088 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 13089 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 13090 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 13091 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 13092 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 13093 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 13094 // CHECK: ret void 13095 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) { 13096 vst3_p8(a, b); 13097 } 13098 13099 // CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 13100 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 13101 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 13102 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 13103 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 13104 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 13105 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 13106 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 13107 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13108 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 13109 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 13110 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 13111 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 13112 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 13113 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 13114 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 13115 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 13116 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 13117 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 13118 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 13119 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 13120 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13121 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13122 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13123 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) 13124 // CHECK: ret void 13125 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) { 13126 vst3_p16(a, b); 13127 } 13128 13129 // CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 13130 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 13131 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 13132 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 13133 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 13134 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* 13135 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* 13136 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13137 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 13138 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 13139 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 13140 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 13141 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 13142 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 13143 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 13144 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 13145 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 13146 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 13147 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 13148 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 13149 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 13150 // CHECK: ret void 13151 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) { 13152 vst4q_u8(a, b); 13153 } 13154 13155 // CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 13156 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 13157 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 13158 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 13159 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 13160 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 13161 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 13162 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13163 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13164 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 13165 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 13166 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 13167 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 13168 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 13169 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 13170 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 13171 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 13172 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 13173 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 13174 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 13175 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 13176 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 13177 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 13178 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 13179 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 13180 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 13181 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 13182 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 13183 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 13184 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) 13185 // CHECK: ret void 13186 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) { 13187 vst4q_u16(a, b); 13188 } 13189 13190 // CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 13191 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 13192 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 13193 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 13194 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 13195 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 13196 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 13197 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13198 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 13199 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 13200 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 13201 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 13202 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 13203 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 13204 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 13205 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 13206 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 13207 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 13208 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 13209 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 13210 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 13211 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 13212 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 13213 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 13214 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 13215 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 13216 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 13217 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 13218 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 13219 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]]) 13220 // CHECK: ret void 13221 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) { 13222 vst4q_u32(a, b); 13223 } 13224 13225 // CHECK-LABEL: define void @test_vst4q_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 13226 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 13227 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 13228 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 13229 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 13230 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* 13231 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* 13232 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13233 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 13234 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 13235 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 13236 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 13237 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 13238 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 13239 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 13240 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 13241 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 13242 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 13243 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 13244 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 13245 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 13246 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 13247 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 13248 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 13249 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 13250 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 13251 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 13252 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 13253 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 13254 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) 13255 // CHECK: ret void 13256 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) { 13257 vst4q_u64(a, b); 13258 } 13259 13260 // CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 13261 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 13262 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 13263 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 13264 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 13265 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* 13266 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* 13267 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13268 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 13269 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 13270 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 13271 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 13272 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 13273 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 13274 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 13275 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 13276 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 13277 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 13278 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 13279 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 13280 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 13281 // CHECK: ret void 13282 void test_vst4q_s8(int8_t *a, int8x16x4_t b) { 13283 vst4q_s8(a, b); 13284 } 13285 13286 // CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 13287 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 13288 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 13289 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 13290 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 13291 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 13292 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 13293 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13294 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13295 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 13296 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 13297 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 13298 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 13299 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 13300 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 13301 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 13302 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 13303 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 13304 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 13305 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 13306 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 13307 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 13308 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 13309 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 13310 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 13311 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 13312 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 13313 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 13314 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 13315 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) 13316 // CHECK: ret void 13317 void test_vst4q_s16(int16_t *a, int16x8x4_t b) { 13318 vst4q_s16(a, b); 13319 } 13320 13321 // CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 13322 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 13323 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 13324 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 13325 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 13326 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 13327 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 13328 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13329 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 13330 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 13331 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 13332 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 13333 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 13334 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 13335 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 13336 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 13337 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 13338 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 13339 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 13340 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 13341 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 13342 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 13343 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 13344 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 13345 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 13346 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 13347 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 13348 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 13349 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 13350 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]]) 13351 // CHECK: ret void 13352 void test_vst4q_s32(int32_t *a, int32x4x4_t b) { 13353 vst4q_s32(a, b); 13354 } 13355 13356 // CHECK-LABEL: define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 13357 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 13358 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 13359 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 13360 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 13361 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* 13362 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* 13363 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13364 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 13365 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 13366 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 13367 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 13368 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 13369 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 13370 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 13371 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 13372 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 13373 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 13374 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 13375 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 13376 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 13377 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 13378 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 13379 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 13380 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 13381 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 13382 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 13383 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 13384 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 13385 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) 13386 // CHECK: ret void 13387 void test_vst4q_s64(int64_t *a, int64x2x4_t b) { 13388 vst4q_s64(a, b); 13389 } 13390 13391 // CHECK-LABEL: define void @test_vst4q_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 { 13392 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 13393 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 13394 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 13395 // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 13396 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 13397 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 13398 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13399 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 13400 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 13401 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 13402 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 13403 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 13404 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 13405 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 13406 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 13407 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 13408 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 13409 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 13410 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 13411 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 13412 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 13413 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 13414 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 13415 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 13416 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 13417 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 13418 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 13419 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 13420 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) 13421 // CHECK: ret void 13422 void test_vst4q_f16(float16_t *a, float16x8x4_t b) { 13423 vst4q_f16(a, b); 13424 } 13425 13426 // CHECK-LABEL: define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 { 13427 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 13428 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 13429 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 13430 // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 13431 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 13432 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 13433 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13434 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 13435 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 13436 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 13437 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 13438 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 13439 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 13440 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 13441 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 13442 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 13443 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 13444 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 13445 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 13446 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 13447 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 13448 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 13449 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 13450 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 13451 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 13452 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 13453 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 13454 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 13455 // CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]]) 13456 // CHECK: ret void 13457 void test_vst4q_f32(float32_t *a, float32x4x4_t b) { 13458 vst4q_f32(a, b); 13459 } 13460 13461 // CHECK-LABEL: define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 { 13462 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 13463 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 13464 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 13465 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 13466 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* 13467 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* 13468 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13469 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 13470 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 13471 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 13472 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 13473 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 13474 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 13475 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 13476 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 13477 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 13478 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 13479 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 13480 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 13481 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 13482 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 13483 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 13484 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 13485 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> 13486 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 13487 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 13488 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 13489 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> 13490 // CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]]) 13491 // CHECK: ret void 13492 void test_vst4q_f64(float64_t *a, float64x2x4_t b) { 13493 vst4q_f64(a, b); 13494 } 13495 13496 // CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 13497 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 13498 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 13499 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 13500 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 13501 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* 13502 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* 13503 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13504 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 13505 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 13506 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 13507 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 13508 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 13509 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 13510 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 13511 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 13512 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 13513 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 13514 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 13515 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 13516 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 13517 // CHECK: ret void 13518 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) { 13519 vst4q_p8(a, b); 13520 } 13521 13522 // CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 13523 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 13524 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 13525 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 13526 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 13527 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 13528 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 13529 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 13530 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13531 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 13532 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 13533 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 13534 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 13535 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 13536 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 13537 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 13538 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 13539 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 13540 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 13541 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 13542 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 13543 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 13544 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 13545 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 13546 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 13547 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 13548 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 13549 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 13550 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 13551 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) 13552 // CHECK: ret void 13553 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) { 13554 vst4q_p16(a, b); 13555 } 13556 13557 // CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 13558 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 13559 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 13560 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 13561 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 13562 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 13563 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 13564 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13565 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 13566 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 13567 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 13568 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 13569 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 13570 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 13571 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 13572 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 13573 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 13574 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 13575 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 13576 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 13577 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 13578 // CHECK: ret void 13579 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) { 13580 vst4_u8(a, b); 13581 } 13582 13583 // CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 13584 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 13585 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 13586 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 13587 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 13588 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 13589 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 13590 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13591 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13592 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 13593 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 13594 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 13595 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 13596 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 13597 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 13598 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 13599 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 13600 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 13601 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 13602 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 13603 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 13604 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 13605 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 13606 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 13607 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 13608 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13609 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13610 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13611 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 13612 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) 13613 // CHECK: ret void 13614 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) { 13615 vst4_u16(a, b); 13616 } 13617 13618 // CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 13619 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 13620 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 13621 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 13622 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 13623 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 13624 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 13625 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13626 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 13627 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 13628 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 13629 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 13630 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 13631 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 13632 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 13633 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 13634 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 13635 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 13636 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 13637 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 13638 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 13639 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 13640 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 13641 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 13642 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 13643 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 13644 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 13645 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 13646 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 13647 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]]) 13648 // CHECK: ret void 13649 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) { 13650 vst4_u32(a, b); 13651 } 13652 13653 // CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 13654 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 13655 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 13656 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 13657 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 13658 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* 13659 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* 13660 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13661 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 13662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 13663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 13664 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 13665 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 13666 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 13667 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 13668 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 13669 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 13670 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 13671 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 13672 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 13673 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 13674 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 13675 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 13676 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 13677 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 13678 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 13679 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 13680 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 13681 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 13682 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) 13683 // CHECK: ret void 13684 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) { 13685 vst4_u64(a, b); 13686 } 13687 13688 // CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 13689 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 13690 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 13691 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 13692 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 13693 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 13694 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 13695 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13696 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 13697 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 13698 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 13699 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 13700 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 13701 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 13702 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 13703 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 13704 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 13705 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 13706 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 13707 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 13708 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 13709 // CHECK: ret void 13710 void test_vst4_s8(int8_t *a, int8x8x4_t b) { 13711 vst4_s8(a, b); 13712 } 13713 13714 // CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 13715 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 13716 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 13717 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 13718 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 13719 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 13720 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 13721 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13722 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13723 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 13724 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 13725 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 13726 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 13727 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 13728 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 13729 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 13730 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 13731 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 13732 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 13733 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 13734 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 13735 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 13736 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 13737 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 13738 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 13739 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13740 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13741 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13742 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 13743 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) 13744 // CHECK: ret void 13745 void test_vst4_s16(int16_t *a, int16x4x4_t b) { 13746 vst4_s16(a, b); 13747 } 13748 13749 // CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 13750 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 13751 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 13752 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 13753 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 13754 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 13755 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 13756 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13757 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 13758 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 13759 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 13760 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 13761 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 13762 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 13763 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 13764 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 13765 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 13766 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 13767 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 13768 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 13769 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 13770 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 13771 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 13772 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 13773 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 13774 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 13775 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 13776 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 13777 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 13778 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]]) 13779 // CHECK: ret void 13780 void test_vst4_s32(int32_t *a, int32x2x4_t b) { 13781 vst4_s32(a, b); 13782 } 13783 13784 // CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 13785 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 13786 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 13787 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 13788 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 13789 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* 13790 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* 13791 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13792 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 13793 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 13794 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 13795 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 13796 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 13797 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 13798 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 13799 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 13800 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 13801 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 13802 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 13803 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 13804 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 13805 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 13806 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 13807 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 13808 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 13809 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 13810 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 13811 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 13812 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 13813 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) 13814 // CHECK: ret void 13815 void test_vst4_s64(int64_t *a, int64x1x4_t b) { 13816 vst4_s64(a, b); 13817 } 13818 13819 // CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 { 13820 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 13821 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 13822 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 13823 // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 13824 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 13825 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 13826 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13827 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 13828 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 13829 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 13830 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 13831 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 13832 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 13833 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 13834 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 13835 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 13836 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 13837 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 13838 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 13839 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 13840 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 13841 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 13842 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 13843 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 13844 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13845 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13846 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13847 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 13848 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) 13849 // CHECK: ret void 13850 void test_vst4_f16(float16_t *a, float16x4x4_t b) { 13851 vst4_f16(a, b); 13852 } 13853 13854 // CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 { 13855 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 13856 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 13857 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 13858 // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 13859 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 13860 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 13861 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13862 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 13863 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 13864 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 13865 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 13866 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 13867 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 13868 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 13869 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 13870 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 13871 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 13872 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 13873 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 13874 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 13875 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 13876 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 13877 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 13878 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 13879 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 13880 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 13881 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 13882 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 13883 // CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]]) 13884 // CHECK: ret void 13885 void test_vst4_f32(float32_t *a, float32x2x4_t b) { 13886 vst4_f32(a, b); 13887 } 13888 13889 // CHECK-LABEL: define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 { 13890 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 13891 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 13892 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 13893 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 13894 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* 13895 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* 13896 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13897 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 13898 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 13899 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 13900 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 13901 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 13902 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 13903 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 13904 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 13905 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 13906 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 13907 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 13908 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 13909 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 13910 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 13911 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 13912 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 13913 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> 13914 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 13915 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 13916 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 13917 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> 13918 // CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]]) 13919 // CHECK: ret void 13920 void test_vst4_f64(float64_t *a, float64x1x4_t b) { 13921 vst4_f64(a, b); 13922 } 13923 13924 // CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 13925 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 13926 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 13927 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 13928 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 13929 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 13930 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 13931 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13932 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 13933 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 13934 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 13935 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 13936 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 13937 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 13938 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 13939 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 13940 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 13941 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 13942 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 13943 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 13944 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 13945 // CHECK: ret void 13946 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) { 13947 vst4_p8(a, b); 13948 } 13949 13950 // CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 13951 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 13952 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 13953 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 13954 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 13955 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 13956 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 13957 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 13958 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 13959 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 13960 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 13961 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 13962 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 13963 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 13964 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 13965 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 13966 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 13967 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 13968 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 13969 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 13970 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 13971 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 13972 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 13973 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 13974 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 13975 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 13976 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 13977 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 13978 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 13979 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) 13980 // CHECK: ret void 13981 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) { 13982 vst4_p16(a, b); 13983 } 13984 13985 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vld1q_u8_x2(i8* %a) #0 { 13986 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 13987 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 13988 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 13989 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a) 13990 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 13991 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] 13992 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* 13993 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 13994 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 13995 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 13996 // CHECK: ret %struct.uint8x16x2_t [[TMP4]] 13997 uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) { 13998 return vld1q_u8_x2(a); 13999 } 14000 14001 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) #0 { 14002 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 14003 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 14004 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 14005 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14006 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14007 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]]) 14008 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 14009 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] 14010 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* 14011 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 14012 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14013 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 14014 // CHECK: ret %struct.uint16x8x2_t [[TMP6]] 14015 uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) { 14016 return vld1q_u16_x2(a); 14017 } 14018 14019 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vld1q_u32_x2(i32* %a) #0 { 14020 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 14021 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 14022 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 14023 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14024 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14025 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]]) 14026 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 14027 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]] 14028 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* 14029 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 14030 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14031 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 14032 // CHECK: ret %struct.uint32x4x2_t [[TMP6]] 14033 uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) { 14034 return vld1q_u32_x2(a); 14035 } 14036 14037 // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld1q_u64_x2(i64* %a) #0 { 14038 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 14039 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 14040 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 14041 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14042 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14043 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]]) 14044 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 14045 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] 14046 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* 14047 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 14048 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14049 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 14050 // CHECK: ret %struct.uint64x2x2_t [[TMP6]] 14051 uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) { 14052 return vld1q_u64_x2(a); 14053 } 14054 14055 // CHECK-LABEL: define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) #0 { 14056 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 14057 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 14058 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 14059 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a) 14060 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 14061 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] 14062 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* 14063 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 14064 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 14065 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 14066 // CHECK: ret %struct.int8x16x2_t [[TMP4]] 14067 int8x16x2_t test_vld1q_s8_x2(int8_t const *a) { 14068 return vld1q_s8_x2(a); 14069 } 14070 14071 // CHECK-LABEL: define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) #0 { 14072 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 14073 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 14074 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 14075 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14076 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14077 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]]) 14078 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 14079 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] 14080 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* 14081 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 14082 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14083 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 14084 // CHECK: ret %struct.int16x8x2_t [[TMP6]] 14085 int16x8x2_t test_vld1q_s16_x2(int16_t const *a) { 14086 return vld1q_s16_x2(a); 14087 } 14088 14089 // CHECK-LABEL: define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) #0 { 14090 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 14091 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 14092 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 14093 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14094 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14095 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* [[TMP2]]) 14096 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 14097 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32> }* [[TMP3]] 14098 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* 14099 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 14100 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14101 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 14102 // CHECK: ret %struct.int32x4x2_t [[TMP6]] 14103 int32x4x2_t test_vld1q_s32_x2(int32_t const *a) { 14104 return vld1q_s32_x2(a); 14105 } 14106 14107 // CHECK-LABEL: define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) #0 { 14108 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 14109 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 14110 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 14111 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14112 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14113 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]]) 14114 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 14115 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] 14116 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* 14117 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 14118 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14119 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 14120 // CHECK: ret %struct.int64x2x2_t [[TMP6]] 14121 int64x2x2_t test_vld1q_s64_x2(int64_t const *a) { 14122 return vld1q_s64_x2(a); 14123 } 14124 14125 // CHECK-LABEL: define %struct.float16x8x2_t @test_vld1q_f16_x2(half* %a) #0 { 14126 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 14127 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 14128 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 14129 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 14130 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14131 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]]) 14132 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 14133 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] 14134 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* 14135 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 14136 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14137 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 14138 // CHECK: ret %struct.float16x8x2_t [[TMP6]] 14139 float16x8x2_t test_vld1q_f16_x2(float16_t const *a) { 14140 return vld1q_f16_x2(a); 14141 } 14142 14143 // CHECK-LABEL: define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) #0 { 14144 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 14145 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 14146 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 14147 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 14148 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 14149 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* [[TMP2]]) 14150 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* 14151 // CHECK: store { <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float> }* [[TMP3]] 14152 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* 14153 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 14154 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14155 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 14156 // CHECK: ret %struct.float32x4x2_t [[TMP6]] 14157 float32x4x2_t test_vld1q_f32_x2(float32_t const *a) { 14158 return vld1q_f32_x2(a); 14159 } 14160 14161 // CHECK-LABEL: define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) #0 { 14162 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 14163 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 14164 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 14165 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 14166 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 14167 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* [[TMP2]]) 14168 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }* 14169 // CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double> }* [[TMP3]] 14170 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* 14171 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 14172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14173 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 14174 // CHECK: ret %struct.float64x2x2_t [[TMP6]] 14175 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) { 14176 return vld1q_f64_x2(a); 14177 } 14178 14179 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vld1q_p8_x2(i8* %a) #0 { 14180 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 14181 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 14182 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 14183 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %a) 14184 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 14185 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8> }* [[TMP1]] 14186 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* 14187 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 14188 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 14189 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 14190 // CHECK: ret %struct.poly8x16x2_t [[TMP4]] 14191 poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) { 14192 return vld1q_p8_x2(a); 14193 } 14194 14195 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vld1q_p16_x2(i16* %a) #0 { 14196 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 14197 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 14198 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 14199 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14200 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14201 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* [[TMP2]]) 14202 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 14203 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16> }* [[TMP3]] 14204 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* 14205 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 14206 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14207 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 14208 // CHECK: ret %struct.poly16x8x2_t [[TMP6]] 14209 poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) { 14210 return vld1q_p16_x2(a); 14211 } 14212 14213 // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld1q_p64_x2(i64* %a) #0 { 14214 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 14215 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 14216 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 14217 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14218 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14219 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* [[TMP2]]) 14220 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 14221 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64> }* [[TMP3]] 14222 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* 14223 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 14224 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 14225 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 14226 // CHECK: ret %struct.poly64x2x2_t [[TMP6]] 14227 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) { 14228 return vld1q_p64_x2(a); 14229 } 14230 14231 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vld1_u8_x2(i8* %a) #0 { 14232 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 14233 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 14234 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 14235 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a) 14236 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 14237 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] 14238 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* 14239 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 14240 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 14241 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 14242 // CHECK: ret %struct.uint8x8x2_t [[TMP4]] 14243 uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) { 14244 return vld1_u8_x2(a); 14245 } 14246 14247 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) #0 { 14248 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 14249 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 14250 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 14251 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14252 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14253 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]]) 14254 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 14255 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] 14256 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* 14257 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 14258 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14259 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 14260 // CHECK: ret %struct.uint16x4x2_t [[TMP6]] 14261 uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) { 14262 return vld1_u16_x2(a); 14263 } 14264 14265 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vld1_u32_x2(i32* %a) #0 { 14266 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 14267 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 14268 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 14269 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14270 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14271 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]]) 14272 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 14273 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]] 14274 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* 14275 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 14276 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14277 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 14278 // CHECK: ret %struct.uint32x2x2_t [[TMP6]] 14279 uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) { 14280 return vld1_u32_x2(a); 14281 } 14282 14283 // CHECK-LABEL: define %struct.uint64x1x2_t @test_vld1_u64_x2(i64* %a) #0 { 14284 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 14285 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 14286 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 14287 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14288 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14289 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]]) 14290 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 14291 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] 14292 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* 14293 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 14294 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14295 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 14296 // CHECK: ret %struct.uint64x1x2_t [[TMP6]] 14297 uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) { 14298 return vld1_u64_x2(a); 14299 } 14300 14301 // CHECK-LABEL: define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) #0 { 14302 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 14303 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 14304 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 14305 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a) 14306 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 14307 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] 14308 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* 14309 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 14310 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 14311 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 14312 // CHECK: ret %struct.int8x8x2_t [[TMP4]] 14313 int8x8x2_t test_vld1_s8_x2(int8_t const *a) { 14314 return vld1_s8_x2(a); 14315 } 14316 14317 // CHECK-LABEL: define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) #0 { 14318 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 14319 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 14320 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 14321 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14322 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14323 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]]) 14324 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 14325 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] 14326 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* 14327 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 14328 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14329 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 14330 // CHECK: ret %struct.int16x4x2_t [[TMP6]] 14331 int16x4x2_t test_vld1_s16_x2(int16_t const *a) { 14332 return vld1_s16_x2(a); 14333 } 14334 14335 // CHECK-LABEL: define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) #0 { 14336 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 14337 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 14338 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 14339 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14340 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14341 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* [[TMP2]]) 14342 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 14343 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32> }* [[TMP3]] 14344 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* 14345 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 14346 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14347 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 14348 // CHECK: ret %struct.int32x2x2_t [[TMP6]] 14349 int32x2x2_t test_vld1_s32_x2(int32_t const *a) { 14350 return vld1_s32_x2(a); 14351 } 14352 14353 // CHECK-LABEL: define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) #0 { 14354 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 14355 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 14356 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 14357 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14358 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14359 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]]) 14360 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 14361 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] 14362 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* 14363 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 14364 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14365 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 14366 // CHECK: ret %struct.int64x1x2_t [[TMP6]] 14367 int64x1x2_t test_vld1_s64_x2(int64_t const *a) { 14368 return vld1_s64_x2(a); 14369 } 14370 14371 // CHECK-LABEL: define %struct.float16x4x2_t @test_vld1_f16_x2(half* %a) #0 { 14372 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 14373 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 14374 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 14375 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 14376 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14377 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]]) 14378 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 14379 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] 14380 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* 14381 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 14382 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14383 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 14384 // CHECK: ret %struct.float16x4x2_t [[TMP6]] 14385 float16x4x2_t test_vld1_f16_x2(float16_t const *a) { 14386 return vld1_f16_x2(a); 14387 } 14388 14389 // CHECK-LABEL: define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) #0 { 14390 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 14391 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 14392 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 14393 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 14394 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 14395 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* [[TMP2]]) 14396 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 14397 // CHECK: store { <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float> }* [[TMP3]] 14398 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* 14399 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 14400 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14401 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 14402 // CHECK: ret %struct.float32x2x2_t [[TMP6]] 14403 float32x2x2_t test_vld1_f32_x2(float32_t const *a) { 14404 return vld1_f32_x2(a); 14405 } 14406 14407 // CHECK-LABEL: define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) #0 { 14408 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 14409 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 14410 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 14411 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 14412 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 14413 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* [[TMP2]]) 14414 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }* 14415 // CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double> }* [[TMP3]] 14416 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* 14417 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 14418 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14419 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 14420 // CHECK: ret %struct.float64x1x2_t [[TMP6]] 14421 float64x1x2_t test_vld1_f64_x2(float64_t const *a) { 14422 return vld1_f64_x2(a); 14423 } 14424 14425 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vld1_p8_x2(i8* %a) #0 { 14426 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 14427 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 14428 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 14429 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %a) 14430 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 14431 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8> }* [[TMP1]] 14432 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* 14433 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 14434 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 14435 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 14436 // CHECK: ret %struct.poly8x8x2_t [[TMP4]] 14437 poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) { 14438 return vld1_p8_x2(a); 14439 } 14440 14441 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vld1_p16_x2(i16* %a) #0 { 14442 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 14443 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 14444 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 14445 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14446 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14447 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* [[TMP2]]) 14448 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 14449 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16> }* [[TMP3]] 14450 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* 14451 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 14452 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14453 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 14454 // CHECK: ret %struct.poly16x4x2_t [[TMP6]] 14455 poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) { 14456 return vld1_p16_x2(a); 14457 } 14458 14459 // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld1_p64_x2(i64* %a) #0 { 14460 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 14461 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 14462 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 14463 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14464 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14465 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* [[TMP2]]) 14466 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 14467 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64> }* [[TMP3]] 14468 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* 14469 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 14470 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 14471 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 14472 // CHECK: ret %struct.poly64x1x2_t [[TMP6]] 14473 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) { 14474 return vld1_p64_x2(a); 14475 } 14476 14477 // CHECK-LABEL: define %struct.uint8x16x3_t @test_vld1q_u8_x3(i8* %a) #0 { 14478 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 14479 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 14480 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 14481 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a) 14482 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 14483 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 14484 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* 14485 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 14486 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 14487 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 14488 // CHECK: ret %struct.uint8x16x3_t [[TMP4]] 14489 uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) { 14490 return vld1q_u8_x3(a); 14491 } 14492 14493 // CHECK-LABEL: define %struct.uint16x8x3_t @test_vld1q_u16_x3(i16* %a) #0 { 14494 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 14495 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 14496 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 14497 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14498 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14499 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]]) 14500 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 14501 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14502 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* 14503 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 14504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14505 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 14506 // CHECK: ret %struct.uint16x8x3_t [[TMP6]] 14507 uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) { 14508 return vld1q_u16_x3(a); 14509 } 14510 14511 // CHECK-LABEL: define %struct.uint32x4x3_t @test_vld1q_u32_x3(i32* %a) #0 { 14512 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 14513 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 14514 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 14515 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14516 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14517 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]]) 14518 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 14519 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 14520 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* 14521 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 14522 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14523 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 14524 // CHECK: ret %struct.uint32x4x3_t [[TMP6]] 14525 uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) { 14526 return vld1q_u32_x3(a); 14527 } 14528 14529 // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld1q_u64_x3(i64* %a) #0 { 14530 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 14531 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 14532 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 14533 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14534 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14535 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]]) 14536 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 14537 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 14538 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* 14539 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 14540 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14541 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 14542 // CHECK: ret %struct.uint64x2x3_t [[TMP6]] 14543 uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) { 14544 return vld1q_u64_x3(a); 14545 } 14546 14547 // CHECK-LABEL: define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) #0 { 14548 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 14549 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 14550 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 14551 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a) 14552 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 14553 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 14554 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* 14555 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 14556 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 14557 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 14558 // CHECK: ret %struct.int8x16x3_t [[TMP4]] 14559 int8x16x3_t test_vld1q_s8_x3(int8_t const *a) { 14560 return vld1q_s8_x3(a); 14561 } 14562 14563 // CHECK-LABEL: define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) #0 { 14564 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 14565 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 14566 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 14567 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14568 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14569 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]]) 14570 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 14571 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14572 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* 14573 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 14574 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14575 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 14576 // CHECK: ret %struct.int16x8x3_t [[TMP6]] 14577 int16x8x3_t test_vld1q_s16_x3(int16_t const *a) { 14578 return vld1q_s16_x3(a); 14579 } 14580 14581 // CHECK-LABEL: define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) #0 { 14582 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 14583 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 14584 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 14585 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14586 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14587 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* [[TMP2]]) 14588 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 14589 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 14590 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* 14591 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 14592 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14593 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 14594 // CHECK: ret %struct.int32x4x3_t [[TMP6]] 14595 int32x4x3_t test_vld1q_s32_x3(int32_t const *a) { 14596 return vld1q_s32_x3(a); 14597 } 14598 14599 // CHECK-LABEL: define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) #0 { 14600 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 14601 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 14602 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 14603 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14604 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14605 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]]) 14606 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 14607 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 14608 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* 14609 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 14610 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14611 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 14612 // CHECK: ret %struct.int64x2x3_t [[TMP6]] 14613 int64x2x3_t test_vld1q_s64_x3(int64_t const *a) { 14614 return vld1q_s64_x3(a); 14615 } 14616 14617 // CHECK-LABEL: define %struct.float16x8x3_t @test_vld1q_f16_x3(half* %a) #0 { 14618 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 14619 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 14620 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 14621 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 14622 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14623 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]]) 14624 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 14625 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14626 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* 14627 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 14628 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14629 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 14630 // CHECK: ret %struct.float16x8x3_t [[TMP6]] 14631 float16x8x3_t test_vld1q_f16_x3(float16_t const *a) { 14632 return vld1q_f16_x3(a); 14633 } 14634 14635 // CHECK-LABEL: define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) #0 { 14636 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 14637 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 14638 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 14639 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 14640 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 14641 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* [[TMP2]]) 14642 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* 14643 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 14644 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* 14645 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 14646 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14647 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 14648 // CHECK: ret %struct.float32x4x3_t [[TMP6]] 14649 float32x4x3_t test_vld1q_f32_x3(float32_t const *a) { 14650 return vld1q_f32_x3(a); 14651 } 14652 14653 // CHECK-LABEL: define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) #0 { 14654 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 14655 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 14656 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 14657 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 14658 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 14659 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* [[TMP2]]) 14660 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }* 14661 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 14662 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* 14663 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 14664 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14665 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 14666 // CHECK: ret %struct.float64x2x3_t [[TMP6]] 14667 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) { 14668 return vld1q_f64_x3(a); 14669 } 14670 14671 // CHECK-LABEL: define %struct.poly8x16x3_t @test_vld1q_p8_x3(i8* %a) #0 { 14672 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 14673 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 14674 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 14675 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %a) 14676 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 14677 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 14678 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* 14679 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 14680 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 14681 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 14682 // CHECK: ret %struct.poly8x16x3_t [[TMP4]] 14683 poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) { 14684 return vld1q_p8_x3(a); 14685 } 14686 14687 // CHECK-LABEL: define %struct.poly16x8x3_t @test_vld1q_p16_x3(i16* %a) #0 { 14688 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 14689 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 14690 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 14691 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14692 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14693 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* [[TMP2]]) 14694 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 14695 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14696 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* 14697 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 14698 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14699 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 14700 // CHECK: ret %struct.poly16x8x3_t [[TMP6]] 14701 poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) { 14702 return vld1q_p16_x3(a); 14703 } 14704 14705 // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld1q_p64_x3(i64* %a) #0 { 14706 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 14707 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 14708 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 14709 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14710 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14711 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* [[TMP2]]) 14712 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 14713 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 14714 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* 14715 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 14716 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 14717 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 14718 // CHECK: ret %struct.poly64x2x3_t [[TMP6]] 14719 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) { 14720 return vld1q_p64_x3(a); 14721 } 14722 14723 // CHECK-LABEL: define %struct.uint8x8x3_t @test_vld1_u8_x3(i8* %a) #0 { 14724 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 14725 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 14726 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 14727 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a) 14728 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 14729 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 14730 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* 14731 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 14732 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 14733 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 14734 // CHECK: ret %struct.uint8x8x3_t [[TMP4]] 14735 uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) { 14736 return vld1_u8_x3(a); 14737 } 14738 14739 // CHECK-LABEL: define %struct.uint16x4x3_t @test_vld1_u16_x3(i16* %a) #0 { 14740 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 14741 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 14742 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 14743 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14744 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14745 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]]) 14746 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 14747 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 14748 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* 14749 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 14750 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14751 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 14752 // CHECK: ret %struct.uint16x4x3_t [[TMP6]] 14753 uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) { 14754 return vld1_u16_x3(a); 14755 } 14756 14757 // CHECK-LABEL: define %struct.uint32x2x3_t @test_vld1_u32_x3(i32* %a) #0 { 14758 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 14759 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 14760 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 14761 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14762 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14763 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]]) 14764 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 14765 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 14766 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* 14767 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 14768 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14769 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 14770 // CHECK: ret %struct.uint32x2x3_t [[TMP6]] 14771 uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) { 14772 return vld1_u32_x3(a); 14773 } 14774 14775 // CHECK-LABEL: define %struct.uint64x1x3_t @test_vld1_u64_x3(i64* %a) #0 { 14776 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 14777 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 14778 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 14779 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14780 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14781 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]]) 14782 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 14783 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 14784 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* 14785 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 14786 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14787 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 14788 // CHECK: ret %struct.uint64x1x3_t [[TMP6]] 14789 uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) { 14790 return vld1_u64_x3(a); 14791 } 14792 14793 // CHECK-LABEL: define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) #0 { 14794 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 14795 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 14796 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 14797 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a) 14798 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 14799 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 14800 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* 14801 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 14802 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 14803 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 14804 // CHECK: ret %struct.int8x8x3_t [[TMP4]] 14805 int8x8x3_t test_vld1_s8_x3(int8_t const *a) { 14806 return vld1_s8_x3(a); 14807 } 14808 14809 // CHECK-LABEL: define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) #0 { 14810 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 14811 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 14812 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 14813 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14814 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14815 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]]) 14816 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 14817 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 14818 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* 14819 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 14820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14821 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 14822 // CHECK: ret %struct.int16x4x3_t [[TMP6]] 14823 int16x4x3_t test_vld1_s16_x3(int16_t const *a) { 14824 return vld1_s16_x3(a); 14825 } 14826 14827 // CHECK-LABEL: define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) #0 { 14828 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 14829 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 14830 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 14831 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 14832 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 14833 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* [[TMP2]]) 14834 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 14835 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 14836 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* 14837 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 14838 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14839 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 14840 // CHECK: ret %struct.int32x2x3_t [[TMP6]] 14841 int32x2x3_t test_vld1_s32_x3(int32_t const *a) { 14842 return vld1_s32_x3(a); 14843 } 14844 14845 // CHECK-LABEL: define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) #0 { 14846 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 14847 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 14848 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 14849 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14850 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14851 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]]) 14852 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 14853 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 14854 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* 14855 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 14856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14857 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 14858 // CHECK: ret %struct.int64x1x3_t [[TMP6]] 14859 int64x1x3_t test_vld1_s64_x3(int64_t const *a) { 14860 return vld1_s64_x3(a); 14861 } 14862 14863 // CHECK-LABEL: define %struct.float16x4x3_t @test_vld1_f16_x3(half* %a) #0 { 14864 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 14865 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 14866 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 14867 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 14868 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14869 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]]) 14870 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 14871 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 14872 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* 14873 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 14874 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14875 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 14876 // CHECK: ret %struct.float16x4x3_t [[TMP6]] 14877 float16x4x3_t test_vld1_f16_x3(float16_t const *a) { 14878 return vld1_f16_x3(a); 14879 } 14880 14881 // CHECK-LABEL: define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) #0 { 14882 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 14883 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 14884 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 14885 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 14886 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 14887 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* [[TMP2]]) 14888 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 14889 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 14890 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* 14891 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 14892 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14893 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 14894 // CHECK: ret %struct.float32x2x3_t [[TMP6]] 14895 float32x2x3_t test_vld1_f32_x3(float32_t const *a) { 14896 return vld1_f32_x3(a); 14897 } 14898 14899 // CHECK-LABEL: define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) #0 { 14900 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 14901 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 14902 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 14903 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 14904 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 14905 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* [[TMP2]]) 14906 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }* 14907 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 14908 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* 14909 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 14910 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14911 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 14912 // CHECK: ret %struct.float64x1x3_t [[TMP6]] 14913 float64x1x3_t test_vld1_f64_x3(float64_t const *a) { 14914 return vld1_f64_x3(a); 14915 } 14916 14917 // CHECK-LABEL: define %struct.poly8x8x3_t @test_vld1_p8_x3(i8* %a) #0 { 14918 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 14919 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 14920 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 14921 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %a) 14922 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 14923 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 14924 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* 14925 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 14926 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 14927 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 14928 // CHECK: ret %struct.poly8x8x3_t [[TMP4]] 14929 poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) { 14930 return vld1_p8_x3(a); 14931 } 14932 14933 // CHECK-LABEL: define %struct.poly16x4x3_t @test_vld1_p16_x3(i16* %a) #0 { 14934 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 14935 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 14936 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 14937 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14938 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14939 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* [[TMP2]]) 14940 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 14941 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 14942 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* 14943 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 14944 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14945 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 14946 // CHECK: ret %struct.poly16x4x3_t [[TMP6]] 14947 poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) { 14948 return vld1_p16_x3(a); 14949 } 14950 14951 // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld1_p64_x3(i64* %a) #0 { 14952 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 14953 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 14954 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 14955 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 14956 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 14957 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* [[TMP2]]) 14958 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 14959 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 14960 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* 14961 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 14962 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 14963 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 14964 // CHECK: ret %struct.poly64x1x3_t [[TMP6]] 14965 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) { 14966 return vld1_p64_x3(a); 14967 } 14968 14969 // CHECK-LABEL: define %struct.uint8x16x4_t @test_vld1q_u8_x4(i8* %a) #0 { 14970 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 14971 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 14972 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 14973 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a) 14974 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 14975 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 14976 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* 14977 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 14978 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 14979 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 14980 // CHECK: ret %struct.uint8x16x4_t [[TMP4]] 14981 uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) { 14982 return vld1q_u8_x4(a); 14983 } 14984 14985 // CHECK-LABEL: define %struct.uint16x8x4_t @test_vld1q_u16_x4(i16* %a) #0 { 14986 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 14987 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 14988 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 14989 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 14990 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 14991 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]]) 14992 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 14993 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 14994 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* 14995 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 14996 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 14997 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 14998 // CHECK: ret %struct.uint16x8x4_t [[TMP6]] 14999 uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) { 15000 return vld1q_u16_x4(a); 15001 } 15002 15003 // CHECK-LABEL: define %struct.uint32x4x4_t @test_vld1q_u32_x4(i32* %a) #0 { 15004 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 15005 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 15006 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 15007 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 15008 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 15009 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]]) 15010 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 15011 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 15012 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* 15013 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 15014 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15015 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 15016 // CHECK: ret %struct.uint32x4x4_t [[TMP6]] 15017 uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) { 15018 return vld1q_u32_x4(a); 15019 } 15020 15021 // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld1q_u64_x4(i64* %a) #0 { 15022 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 15023 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 15024 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 15025 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15026 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15027 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]]) 15028 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 15029 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 15030 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* 15031 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 15032 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15033 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 15034 // CHECK: ret %struct.uint64x2x4_t [[TMP6]] 15035 uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) { 15036 return vld1q_u64_x4(a); 15037 } 15038 15039 // CHECK-LABEL: define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) #0 { 15040 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 15041 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 15042 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 15043 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a) 15044 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 15045 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 15046 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* 15047 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 15048 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 15049 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 15050 // CHECK: ret %struct.int8x16x4_t [[TMP4]] 15051 int8x16x4_t test_vld1q_s8_x4(int8_t const *a) { 15052 return vld1q_s8_x4(a); 15053 } 15054 15055 // CHECK-LABEL: define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) #0 { 15056 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 15057 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 15058 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 15059 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15060 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15061 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]]) 15062 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 15063 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 15064 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* 15065 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 15066 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15067 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 15068 // CHECK: ret %struct.int16x8x4_t [[TMP6]] 15069 int16x8x4_t test_vld1q_s16_x4(int16_t const *a) { 15070 return vld1q_s16_x4(a); 15071 } 15072 15073 // CHECK-LABEL: define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) #0 { 15074 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 15075 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 15076 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 15077 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 15078 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 15079 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* [[TMP2]]) 15080 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 15081 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 15082 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* 15083 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 15084 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15085 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 15086 // CHECK: ret %struct.int32x4x4_t [[TMP6]] 15087 int32x4x4_t test_vld1q_s32_x4(int32_t const *a) { 15088 return vld1q_s32_x4(a); 15089 } 15090 15091 // CHECK-LABEL: define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) #0 { 15092 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 15093 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 15094 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 15095 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15096 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15097 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]]) 15098 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 15099 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 15100 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* 15101 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 15102 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15103 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 15104 // CHECK: ret %struct.int64x2x4_t [[TMP6]] 15105 int64x2x4_t test_vld1q_s64_x4(int64_t const *a) { 15106 return vld1q_s64_x4(a); 15107 } 15108 15109 // CHECK-LABEL: define %struct.float16x8x4_t @test_vld1q_f16_x4(half* %a) #0 { 15110 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 15111 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 15112 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 15113 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 15114 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15115 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]]) 15116 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 15117 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 15118 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* 15119 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 15120 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15121 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 15122 // CHECK: ret %struct.float16x8x4_t [[TMP6]] 15123 float16x8x4_t test_vld1q_f16_x4(float16_t const *a) { 15124 return vld1q_f16_x4(a); 15125 } 15126 15127 // CHECK-LABEL: define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) #0 { 15128 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 15129 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 15130 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 15131 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 15132 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 15133 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* [[TMP2]]) 15134 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 15135 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 15136 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* 15137 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 15138 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15139 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 15140 // CHECK: ret %struct.float32x4x4_t [[TMP6]] 15141 float32x4x4_t test_vld1q_f32_x4(float32_t const *a) { 15142 return vld1q_f32_x4(a); 15143 } 15144 15145 // CHECK-LABEL: define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) #0 { 15146 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 15147 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 15148 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 15149 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 15150 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 15151 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* [[TMP2]]) 15152 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* 15153 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 15154 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* 15155 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 15156 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15157 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 15158 // CHECK: ret %struct.float64x2x4_t [[TMP6]] 15159 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) { 15160 return vld1q_f64_x4(a); 15161 } 15162 15163 // CHECK-LABEL: define %struct.poly8x16x4_t @test_vld1q_p8_x4(i8* %a) #0 { 15164 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 15165 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 15166 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 15167 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %a) 15168 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 15169 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 15170 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* 15171 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 15172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 15173 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 15174 // CHECK: ret %struct.poly8x16x4_t [[TMP4]] 15175 poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) { 15176 return vld1q_p8_x4(a); 15177 } 15178 15179 // CHECK-LABEL: define %struct.poly16x8x4_t @test_vld1q_p16_x4(i16* %a) #0 { 15180 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 15181 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 15182 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 15183 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15184 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15185 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* [[TMP2]]) 15186 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 15187 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 15188 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* 15189 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 15190 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15191 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 15192 // CHECK: ret %struct.poly16x8x4_t [[TMP6]] 15193 poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) { 15194 return vld1q_p16_x4(a); 15195 } 15196 15197 // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld1q_p64_x4(i64* %a) #0 { 15198 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 15199 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 15200 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 15201 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15202 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15203 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* [[TMP2]]) 15204 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 15205 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 15206 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* 15207 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 15208 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 15209 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 15210 // CHECK: ret %struct.poly64x2x4_t [[TMP6]] 15211 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) { 15212 return vld1q_p64_x4(a); 15213 } 15214 15215 // CHECK-LABEL: define %struct.uint8x8x4_t @test_vld1_u8_x4(i8* %a) #0 { 15216 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 15217 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 15218 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 15219 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a) 15220 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 15221 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 15222 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* 15223 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 15224 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 15225 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 15226 // CHECK: ret %struct.uint8x8x4_t [[TMP4]] 15227 uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) { 15228 return vld1_u8_x4(a); 15229 } 15230 15231 // CHECK-LABEL: define %struct.uint16x4x4_t @test_vld1_u16_x4(i16* %a) #0 { 15232 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 15233 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 15234 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 15235 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15236 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15237 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]]) 15238 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 15239 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 15240 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* 15241 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 15242 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15243 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 15244 // CHECK: ret %struct.uint16x4x4_t [[TMP6]] 15245 uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) { 15246 return vld1_u16_x4(a); 15247 } 15248 15249 // CHECK-LABEL: define %struct.uint32x2x4_t @test_vld1_u32_x4(i32* %a) #0 { 15250 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 15251 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 15252 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 15253 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 15254 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 15255 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]]) 15256 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 15257 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 15258 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* 15259 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 15260 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15261 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 15262 // CHECK: ret %struct.uint32x2x4_t [[TMP6]] 15263 uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) { 15264 return vld1_u32_x4(a); 15265 } 15266 15267 // CHECK-LABEL: define %struct.uint64x1x4_t @test_vld1_u64_x4(i64* %a) #0 { 15268 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 15269 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 15270 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 15271 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15272 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15273 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]]) 15274 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 15275 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 15276 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* 15277 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 15278 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15279 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 15280 // CHECK: ret %struct.uint64x1x4_t [[TMP6]] 15281 uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) { 15282 return vld1_u64_x4(a); 15283 } 15284 15285 // CHECK-LABEL: define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) #0 { 15286 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 15287 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 15288 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 15289 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a) 15290 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 15291 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 15292 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* 15293 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 15294 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 15295 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 15296 // CHECK: ret %struct.int8x8x4_t [[TMP4]] 15297 int8x8x4_t test_vld1_s8_x4(int8_t const *a) { 15298 return vld1_s8_x4(a); 15299 } 15300 15301 // CHECK-LABEL: define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) #0 { 15302 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 15303 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 15304 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 15305 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15306 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15307 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]]) 15308 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 15309 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 15310 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* 15311 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 15312 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15313 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 15314 // CHECK: ret %struct.int16x4x4_t [[TMP6]] 15315 int16x4x4_t test_vld1_s16_x4(int16_t const *a) { 15316 return vld1_s16_x4(a); 15317 } 15318 15319 // CHECK-LABEL: define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) #0 { 15320 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 15321 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 15322 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 15323 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 15324 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 15325 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* [[TMP2]]) 15326 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 15327 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 15328 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* 15329 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 15330 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15331 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 15332 // CHECK: ret %struct.int32x2x4_t [[TMP6]] 15333 int32x2x4_t test_vld1_s32_x4(int32_t const *a) { 15334 return vld1_s32_x4(a); 15335 } 15336 15337 // CHECK-LABEL: define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) #0 { 15338 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 15339 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 15340 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 15341 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15342 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15343 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]]) 15344 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 15345 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 15346 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* 15347 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 15348 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15349 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 15350 // CHECK: ret %struct.int64x1x4_t [[TMP6]] 15351 int64x1x4_t test_vld1_s64_x4(int64_t const *a) { 15352 return vld1_s64_x4(a); 15353 } 15354 15355 // CHECK-LABEL: define %struct.float16x4x4_t @test_vld1_f16_x4(half* %a) #0 { 15356 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 15357 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 15358 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 15359 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 15360 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15361 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]]) 15362 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 15363 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 15364 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* 15365 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 15366 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15367 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 15368 // CHECK: ret %struct.float16x4x4_t [[TMP6]] 15369 float16x4x4_t test_vld1_f16_x4(float16_t const *a) { 15370 return vld1_f16_x4(a); 15371 } 15372 15373 // CHECK-LABEL: define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) #0 { 15374 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 15375 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 15376 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 15377 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 15378 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 15379 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* [[TMP2]]) 15380 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 15381 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 15382 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* 15383 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 15384 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15385 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 15386 // CHECK: ret %struct.float32x2x4_t [[TMP6]] 15387 float32x2x4_t test_vld1_f32_x4(float32_t const *a) { 15388 return vld1_f32_x4(a); 15389 } 15390 15391 // CHECK-LABEL: define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) #0 { 15392 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 15393 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 15394 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 15395 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 15396 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 15397 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* [[TMP2]]) 15398 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* 15399 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 15400 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* 15401 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 15402 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15403 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 15404 // CHECK: ret %struct.float64x1x4_t [[TMP6]] 15405 float64x1x4_t test_vld1_f64_x4(float64_t const *a) { 15406 return vld1_f64_x4(a); 15407 } 15408 15409 // CHECK-LABEL: define %struct.poly8x8x4_t @test_vld1_p8_x4(i8* %a) #0 { 15410 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 15411 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 15412 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 15413 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %a) 15414 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 15415 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 15416 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* 15417 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 15418 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 15419 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 15420 // CHECK: ret %struct.poly8x8x4_t [[TMP4]] 15421 poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) { 15422 return vld1_p8_x4(a); 15423 } 15424 15425 // CHECK-LABEL: define %struct.poly16x4x4_t @test_vld1_p16_x4(i16* %a) #0 { 15426 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 15427 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 15428 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 15429 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 15430 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 15431 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* [[TMP2]]) 15432 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 15433 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 15434 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* 15435 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 15436 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15437 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 15438 // CHECK: ret %struct.poly16x4x4_t [[TMP6]] 15439 poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) { 15440 return vld1_p16_x4(a); 15441 } 15442 15443 // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld1_p64_x4(i64* %a) #0 { 15444 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 15445 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 15446 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 15447 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 15448 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 15449 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* [[TMP2]]) 15450 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 15451 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 15452 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* 15453 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 15454 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 15455 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 15456 // CHECK: ret %struct.poly64x1x4_t [[TMP6]] 15457 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) { 15458 return vld1_p64_x4(a); 15459 } 15460 15461 // CHECK-LABEL: define void @test_vst1q_u8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 15462 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 15463 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 15464 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 15465 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 15466 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* 15467 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* 15468 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15469 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 15470 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 15471 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 15472 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 15473 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 15474 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 15475 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 15476 // CHECK: ret void 15477 void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) { 15478 vst1q_u8_x2(a, b); 15479 } 15480 15481 // CHECK-LABEL: define void @test_vst1q_u16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 15482 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 15483 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 15484 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 15485 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 15486 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 15487 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 15488 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15489 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15490 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 15491 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 15492 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 15493 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 15494 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 15495 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 15496 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 15497 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 15498 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 15499 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 15500 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15501 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) 15502 // CHECK: ret void 15503 void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) { 15504 vst1q_u16_x2(a, b); 15505 } 15506 15507 // CHECK-LABEL: define void @test_vst1q_u32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 15508 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 15509 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 15510 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 15511 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 15512 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 15513 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 15514 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15515 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 15516 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 15517 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 15518 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 15519 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 15520 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 15521 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 15522 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 15523 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 15524 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 15525 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 15526 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* 15527 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]]) 15528 // CHECK: ret void 15529 void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) { 15530 vst1q_u32_x2(a, b); 15531 } 15532 15533 // CHECK-LABEL: define void @test_vst1q_u64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 15534 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 15535 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 15536 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 15537 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 15538 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* 15539 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* 15540 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15541 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15542 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 15543 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 15544 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 15545 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 15546 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 15547 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 15548 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 15549 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 15550 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 15551 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 15552 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15553 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]]) 15554 // CHECK: ret void 15555 void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) { 15556 vst1q_u64_x2(a, b); 15557 } 15558 15559 // CHECK-LABEL: define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 15560 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 15561 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 15562 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 15563 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 15564 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* 15565 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* 15566 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15567 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 15568 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 15569 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 15570 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 15571 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 15572 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 15573 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 15574 // CHECK: ret void 15575 void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) { 15576 vst1q_s8_x2(a, b); 15577 } 15578 15579 // CHECK-LABEL: define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 15580 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 15581 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 15582 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 15583 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 15584 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 15585 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 15586 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15587 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15588 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 15589 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 15590 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 15591 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 15592 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 15593 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 15594 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 15595 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 15596 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 15597 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 15598 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15599 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) 15600 // CHECK: ret void 15601 void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) { 15602 vst1q_s16_x2(a, b); 15603 } 15604 15605 // CHECK-LABEL: define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 15606 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 15607 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 15608 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 15609 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 15610 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 15611 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 15612 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15613 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 15614 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 15615 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 15616 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 15617 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 15618 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 15619 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 15620 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 15621 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 15622 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 15623 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 15624 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* 15625 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]]) 15626 // CHECK: ret void 15627 void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) { 15628 vst1q_s32_x2(a, b); 15629 } 15630 15631 // CHECK-LABEL: define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 15632 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 15633 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 15634 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 15635 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 15636 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* 15637 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* 15638 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15639 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15640 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 15641 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 15642 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 15643 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 15644 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 15645 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 15646 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 15647 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 15648 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 15649 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 15650 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15651 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]]) 15652 // CHECK: ret void 15653 void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) { 15654 vst1q_s64_x2(a, b); 15655 } 15656 15657 // CHECK-LABEL: define void @test_vst1q_f16_x2(half* %a, [2 x <8 x half>] %b.coerce) #0 { 15658 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 15659 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 15660 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 15661 // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 15662 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 15663 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 15664 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15665 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 15666 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 15667 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 15668 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 15669 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 15670 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 15671 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 15672 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 15673 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 15674 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 15675 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 15676 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15677 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) 15678 // CHECK: ret void 15679 void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) { 15680 vst1q_f16_x2(a, b); 15681 } 15682 15683 // CHECK-LABEL: define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b.coerce) #0 { 15684 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 15685 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 15686 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 15687 // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 15688 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 15689 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 15690 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15691 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 15692 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 15693 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 15694 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 15695 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 15696 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 15697 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 15698 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 15699 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 15700 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 15701 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 15702 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float* 15703 // CHECK: call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], float* [[TMP9]]) 15704 // CHECK: ret void 15705 void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) { 15706 vst1q_f32_x2(a, b); 15707 } 15708 15709 // CHECK-LABEL: define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b.coerce) #0 { 15710 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 15711 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 15712 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 15713 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 15714 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* 15715 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* 15716 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15717 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 15718 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 15719 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 15720 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 15721 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 15722 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 15723 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 15724 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 15725 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 15726 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 15727 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 15728 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double* 15729 // CHECK: call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> [[TMP7]], <2 x double> [[TMP8]], double* [[TMP9]]) 15730 // CHECK: ret void 15731 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) { 15732 vst1q_f64_x2(a, b); 15733 } 15734 15735 // CHECK-LABEL: define void @test_vst1q_p8_x2(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 15736 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 15737 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 15738 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 15739 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 15740 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* 15741 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* 15742 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15743 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 15744 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 15745 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 15746 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 15747 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 15748 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 15749 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) 15750 // CHECK: ret void 15751 void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) { 15752 vst1q_p8_x2(a, b); 15753 } 15754 15755 // CHECK-LABEL: define void @test_vst1q_p16_x2(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 15756 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 15757 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 15758 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 15759 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 15760 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 15761 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 15762 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15763 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15764 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 15765 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 15766 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 15767 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 15768 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 15769 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 15770 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 15771 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 15772 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 15773 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 15774 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15775 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) 15776 // CHECK: ret void 15777 void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) { 15778 vst1q_p16_x2(a, b); 15779 } 15780 15781 // CHECK-LABEL: define void @test_vst1q_p64_x2(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 15782 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 15783 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 15784 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0 15785 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 15786 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* 15787 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* 15788 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 15789 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15790 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 15791 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 15792 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 15793 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 15794 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 15795 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 15796 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 15797 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 15798 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 15799 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 15800 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15801 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]]) 15802 // CHECK: ret void 15803 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) { 15804 vst1q_p64_x2(a, b); 15805 } 15806 15807 // CHECK-LABEL: define void @test_vst1_u8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 15808 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 15809 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 15810 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 15811 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 15812 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 15813 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 15814 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15815 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 15816 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 15817 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 15818 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 15819 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 15820 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 15821 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 15822 // CHECK: ret void 15823 void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) { 15824 vst1_u8_x2(a, b); 15825 } 15826 15827 // CHECK-LABEL: define void @test_vst1_u16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 15828 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 15829 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 15830 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 15831 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 15832 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 15833 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 15834 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15835 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15836 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 15837 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 15838 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 15839 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 15840 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 15841 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 15842 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 15843 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 15844 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 15845 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 15846 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15847 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) 15848 // CHECK: ret void 15849 void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) { 15850 vst1_u16_x2(a, b); 15851 } 15852 15853 // CHECK-LABEL: define void @test_vst1_u32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 15854 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 15855 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 15856 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 15857 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 15858 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 15859 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 15860 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15861 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 15862 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 15863 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 15864 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 15865 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 15866 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 15867 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 15868 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 15869 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 15870 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 15871 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 15872 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* 15873 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]]) 15874 // CHECK: ret void 15875 void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) { 15876 vst1_u32_x2(a, b); 15877 } 15878 15879 // CHECK-LABEL: define void @test_vst1_u64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 15880 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 15881 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 15882 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 15883 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 15884 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* 15885 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* 15886 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15887 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15888 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 15889 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 15890 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 15891 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 15892 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 15893 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 15894 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 15895 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 15896 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 15897 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 15898 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15899 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]]) 15900 // CHECK: ret void 15901 void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) { 15902 vst1_u64_x2(a, b); 15903 } 15904 15905 // CHECK-LABEL: define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 15906 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 15907 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 15908 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 15909 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 15910 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 15911 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 15912 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15913 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 15914 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 15915 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 15916 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 15917 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 15918 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 15919 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 15920 // CHECK: ret void 15921 void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) { 15922 vst1_s8_x2(a, b); 15923 } 15924 15925 // CHECK-LABEL: define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 15926 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 15927 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 15928 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 15929 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 15930 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 15931 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 15932 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15933 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 15934 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 15935 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 15936 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 15937 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 15938 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 15939 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 15940 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 15941 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 15942 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 15943 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 15944 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 15945 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) 15946 // CHECK: ret void 15947 void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) { 15948 vst1_s16_x2(a, b); 15949 } 15950 15951 // CHECK-LABEL: define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 15952 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 15953 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 15954 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 15955 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 15956 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 15957 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 15958 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15959 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 15960 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 15961 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 15962 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 15963 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 15964 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 15965 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 15966 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 15967 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 15968 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 15969 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 15970 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* 15971 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]]) 15972 // CHECK: ret void 15973 void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) { 15974 vst1_s32_x2(a, b); 15975 } 15976 15977 // CHECK-LABEL: define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 15978 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 15979 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 15980 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 15981 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 15982 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* 15983 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* 15984 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 15985 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 15986 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 15987 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 15988 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 15989 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 15990 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 15991 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 15992 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 15993 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 15994 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 15995 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 15996 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 15997 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]]) 15998 // CHECK: ret void 15999 void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) { 16000 vst1_s64_x2(a, b); 16001 } 16002 16003 // CHECK-LABEL: define void @test_vst1_f16_x2(half* %a, [2 x <4 x half>] %b.coerce) #0 { 16004 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 16005 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 16006 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 16007 // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 16008 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 16009 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 16010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16011 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 16012 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 16013 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 16014 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 16015 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 16016 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 16017 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 16018 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 16019 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 16020 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16021 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16022 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 16023 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) 16024 // CHECK: ret void 16025 void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) { 16026 vst1_f16_x2(a, b); 16027 } 16028 16029 // CHECK-LABEL: define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b.coerce) #0 { 16030 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 16031 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 16032 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 16033 // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 16034 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 16035 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 16036 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16037 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 16038 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 16039 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 16040 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 16041 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 16042 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 16043 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 16044 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 16045 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 16046 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 16047 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 16048 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float* 16049 // CHECK: call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], float* [[TMP9]]) 16050 // CHECK: ret void 16051 void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) { 16052 vst1_f32_x2(a, b); 16053 } 16054 16055 // CHECK-LABEL: define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b.coerce) #0 { 16056 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 16057 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 16058 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 16059 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 16060 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* 16061 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* 16062 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16063 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 16064 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 16065 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 16066 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 16067 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 16068 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 16069 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 16070 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 16071 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 16072 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 16073 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 16074 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to double* 16075 // CHECK: call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> [[TMP7]], <1 x double> [[TMP8]], double* [[TMP9]]) 16076 // CHECK: ret void 16077 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) { 16078 vst1_f64_x2(a, b); 16079 } 16080 16081 // CHECK-LABEL: define void @test_vst1_p8_x2(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 16082 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 16083 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 16084 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 16085 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 16086 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 16087 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 16088 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16089 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 16090 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 16091 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 16092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 16093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 16094 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 16095 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) 16096 // CHECK: ret void 16097 void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) { 16098 vst1_p8_x2(a, b); 16099 } 16100 16101 // CHECK-LABEL: define void @test_vst1_p16_x2(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 16102 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 16103 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 16104 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 16105 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 16106 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 16107 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 16108 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16109 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16110 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 16111 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 16112 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 16113 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 16114 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 16115 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 16116 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 16117 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 16118 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16119 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16120 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* 16121 // CHECK: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) 16122 // CHECK: ret void 16123 void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) { 16124 vst1_p16_x2(a, b); 16125 } 16126 16127 // CHECK-LABEL: define void @test_vst1_p64_x2(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 16128 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 16129 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 16130 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0 16131 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 16132 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* 16133 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* 16134 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 16135 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16136 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 16137 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 16138 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 16139 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 16140 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 16141 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 16142 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 16143 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 16144 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 16145 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 16146 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* 16147 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]]) 16148 // CHECK: ret void 16149 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) { 16150 vst1_p64_x2(a, b); 16151 } 16152 16153 // CHECK-LABEL: define void @test_vst1q_u8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 16154 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 16155 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 16156 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 16157 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 16158 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* 16159 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* 16160 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16161 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 16162 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 16163 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 16164 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 16165 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 16166 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 16167 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 16168 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 16169 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 16170 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 16171 // CHECK: ret void 16172 void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) { 16173 vst1q_u8_x3(a, b); 16174 } 16175 16176 // CHECK-LABEL: define void @test_vst1q_u16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 16177 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 16178 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 16179 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 16180 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 16181 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 16182 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 16183 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16184 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16185 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 16186 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 16187 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 16188 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 16189 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 16190 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 16191 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 16192 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 16193 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 16194 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 16195 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 16196 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 16197 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 16198 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 16199 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 16200 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16201 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) 16202 // CHECK: ret void 16203 void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) { 16204 vst1q_u16_x3(a, b); 16205 } 16206 16207 // CHECK-LABEL: define void @test_vst1q_u32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 16208 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 16209 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 16210 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 16211 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 16212 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 16213 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 16214 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16215 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 16216 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 16217 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 16218 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 16219 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 16220 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 16221 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 16222 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 16223 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 16224 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 16225 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 16226 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 16227 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 16228 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 16229 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 16230 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 16231 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* 16232 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]]) 16233 // CHECK: ret void 16234 void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) { 16235 vst1q_u32_x3(a, b); 16236 } 16237 16238 // CHECK-LABEL: define void @test_vst1q_u64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 16239 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 16240 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 16241 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 16242 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 16243 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* 16244 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* 16245 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16246 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16247 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 16248 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 16249 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 16250 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 16251 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 16252 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 16253 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 16254 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 16255 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 16256 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 16257 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 16258 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 16259 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 16260 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 16261 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 16262 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16263 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) 16264 // CHECK: ret void 16265 void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) { 16266 vst1q_u64_x3(a, b); 16267 } 16268 16269 // CHECK-LABEL: define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 16270 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 16271 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 16272 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 16273 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 16274 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* 16275 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* 16276 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16277 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 16278 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 16279 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 16280 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 16281 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 16282 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 16283 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 16284 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 16285 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 16286 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 16287 // CHECK: ret void 16288 void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) { 16289 vst1q_s8_x3(a, b); 16290 } 16291 16292 // CHECK-LABEL: define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 16293 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 16294 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 16295 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 16296 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 16297 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 16298 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 16299 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16300 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16301 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 16302 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 16303 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 16304 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 16305 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 16306 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 16307 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 16308 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 16309 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 16310 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 16311 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 16312 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 16313 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 16314 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 16315 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 16316 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16317 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) 16318 // CHECK: ret void 16319 void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) { 16320 vst1q_s16_x3(a, b); 16321 } 16322 16323 // CHECK-LABEL: define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 16324 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 16325 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 16326 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 16327 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 16328 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 16329 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 16330 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16331 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 16332 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 16333 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 16334 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 16335 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 16336 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 16337 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 16338 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 16339 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 16340 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 16341 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 16342 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 16343 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 16344 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 16345 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 16346 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 16347 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* 16348 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]]) 16349 // CHECK: ret void 16350 void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) { 16351 vst1q_s32_x3(a, b); 16352 } 16353 16354 // CHECK-LABEL: define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 16355 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 16356 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 16357 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 16358 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 16359 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* 16360 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* 16361 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16362 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16363 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 16364 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 16365 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 16366 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 16367 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 16368 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 16369 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 16370 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 16371 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 16372 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 16373 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 16374 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 16375 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 16376 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 16377 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 16378 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16379 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) 16380 // CHECK: ret void 16381 void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) { 16382 vst1q_s64_x3(a, b); 16383 } 16384 16385 // CHECK-LABEL: define void @test_vst1q_f16_x3(half* %a, [3 x <8 x half>] %b.coerce) #0 { 16386 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 16387 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 16388 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 16389 // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 16390 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 16391 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 16392 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16393 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 16394 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 16395 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 16396 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 16397 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 16398 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 16399 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 16400 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 16401 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 16402 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 16403 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 16404 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 16405 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 16406 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 16407 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 16408 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 16409 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16410 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) 16411 // CHECK: ret void 16412 void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) { 16413 vst1q_f16_x3(a, b); 16414 } 16415 16416 // CHECK-LABEL: define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b.coerce) #0 { 16417 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 16418 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 16419 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 16420 // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 16421 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 16422 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 16423 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16424 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 16425 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 16426 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 16427 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 16428 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 16429 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 16430 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 16431 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 16432 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 16433 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 16434 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 16435 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 16436 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 16437 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 16438 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 16439 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 16440 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float* 16441 // CHECK: call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], float* [[TMP12]]) 16442 // CHECK: ret void 16443 void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) { 16444 vst1q_f32_x3(a, b); 16445 } 16446 16447 // CHECK-LABEL: define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b.coerce) #0 { 16448 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 16449 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 16450 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 16451 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 16452 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* 16453 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* 16454 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16455 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 16456 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 16457 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 16458 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 16459 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 16460 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 16461 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 16462 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 16463 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 16464 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 16465 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 16466 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 16467 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 16468 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 16469 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 16470 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 16471 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double* 16472 // CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]]) 16473 // CHECK: ret void 16474 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) { 16475 vst1q_f64_x3(a, b); 16476 } 16477 16478 // CHECK-LABEL: define void @test_vst1q_p8_x3(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 16479 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 16480 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 16481 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 16482 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 16483 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* 16484 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* 16485 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16486 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 16487 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 16488 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 16489 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 16490 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 16491 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 16492 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 16493 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 16494 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 16495 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) 16496 // CHECK: ret void 16497 void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) { 16498 vst1q_p8_x3(a, b); 16499 } 16500 16501 // CHECK-LABEL: define void @test_vst1q_p16_x3(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 16502 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 16503 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 16504 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 16505 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 16506 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 16507 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 16508 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16509 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16510 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 16511 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 16512 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 16513 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 16514 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 16515 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 16516 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 16517 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 16518 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 16519 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 16520 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 16521 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 16522 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 16523 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 16524 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 16525 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16526 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) 16527 // CHECK: ret void 16528 void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) { 16529 vst1q_p16_x3(a, b); 16530 } 16531 16532 // CHECK-LABEL: define void @test_vst1q_p64_x3(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 16533 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 16534 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 16535 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0 16536 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 16537 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* 16538 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* 16539 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 16540 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16541 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 16542 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 16543 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 16544 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 16545 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 16546 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 16547 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 16548 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 16549 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 16550 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 16551 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 16552 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 16553 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 16554 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 16555 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 16556 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16557 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) 16558 // CHECK: ret void 16559 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) { 16560 vst1q_p64_x3(a, b); 16561 } 16562 16563 // CHECK-LABEL: define void @test_vst1_u8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 16564 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 16565 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 16566 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 16567 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 16568 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 16569 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 16570 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16571 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 16572 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 16573 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 16574 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 16575 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 16576 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 16577 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 16578 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 16579 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 16580 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 16581 // CHECK: ret void 16582 void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) { 16583 vst1_u8_x3(a, b); 16584 } 16585 16586 // CHECK-LABEL: define void @test_vst1_u16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 16587 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 16588 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 16589 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 16590 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 16591 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 16592 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 16593 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16594 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16595 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 16596 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 16597 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 16598 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 16599 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 16600 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 16601 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 16602 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 16603 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 16604 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 16605 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 16606 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 16607 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16608 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16609 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 16610 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16611 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) 16612 // CHECK: ret void 16613 void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) { 16614 vst1_u16_x3(a, b); 16615 } 16616 16617 // CHECK-LABEL: define void @test_vst1_u32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 16618 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 16619 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 16620 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 16621 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 16622 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 16623 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 16624 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16625 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 16626 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 16627 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 16628 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 16629 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 16630 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 16631 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 16632 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 16633 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 16634 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 16635 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 16636 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 16637 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 16638 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 16639 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 16640 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 16641 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* 16642 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]]) 16643 // CHECK: ret void 16644 void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) { 16645 vst1_u32_x3(a, b); 16646 } 16647 16648 // CHECK-LABEL: define void @test_vst1_u64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 16649 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 16650 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 16651 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 16652 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 16653 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* 16654 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* 16655 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16656 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16657 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 16658 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 16659 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 16660 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 16661 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 16662 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 16663 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 16664 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 16665 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 16666 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 16667 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 16668 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 16669 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 16670 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 16671 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 16672 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16673 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) 16674 // CHECK: ret void 16675 void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) { 16676 vst1_u64_x3(a, b); 16677 } 16678 16679 // CHECK-LABEL: define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 16680 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 16681 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 16682 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 16683 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 16684 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 16685 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 16686 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16687 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 16688 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 16689 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 16690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 16691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 16692 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 16693 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 16694 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 16695 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 16696 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 16697 // CHECK: ret void 16698 void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) { 16699 vst1_s8_x3(a, b); 16700 } 16701 16702 // CHECK-LABEL: define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 16703 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 16704 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 16705 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 16706 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 16707 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 16708 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 16709 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16710 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16711 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 16712 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 16713 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 16714 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 16715 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 16716 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 16717 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 16718 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 16719 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 16720 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 16721 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 16722 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 16723 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16724 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16725 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 16726 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16727 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) 16728 // CHECK: ret void 16729 void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) { 16730 vst1_s16_x3(a, b); 16731 } 16732 16733 // CHECK-LABEL: define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 16734 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 16735 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 16736 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 16737 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 16738 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 16739 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 16740 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16741 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 16742 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 16743 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 16744 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 16745 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 16746 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 16747 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 16748 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 16749 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 16750 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 16751 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 16752 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 16753 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 16754 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 16755 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 16756 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 16757 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* 16758 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]]) 16759 // CHECK: ret void 16760 void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) { 16761 vst1_s32_x3(a, b); 16762 } 16763 16764 // CHECK-LABEL: define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 16765 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 16766 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 16767 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 16768 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 16769 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* 16770 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* 16771 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16772 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16773 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 16774 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 16775 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 16776 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 16777 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 16778 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 16779 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 16780 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 16781 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 16782 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 16783 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 16784 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 16785 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 16786 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 16787 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 16788 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16789 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) 16790 // CHECK: ret void 16791 void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) { 16792 vst1_s64_x3(a, b); 16793 } 16794 16795 // CHECK-LABEL: define void @test_vst1_f16_x3(half* %a, [3 x <4 x half>] %b.coerce) #0 { 16796 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 16797 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 16798 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 16799 // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 16800 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 16801 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 16802 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16803 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 16804 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 16805 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 16806 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 16807 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 16808 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 16809 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 16810 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 16811 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 16812 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 16813 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 16814 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 16815 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 16816 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16817 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16818 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 16819 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16820 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) 16821 // CHECK: ret void 16822 void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) { 16823 vst1_f16_x3(a, b); 16824 } 16825 16826 // CHECK-LABEL: define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b.coerce) #0 { 16827 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 16828 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 16829 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 16830 // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 16831 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 16832 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 16833 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16834 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 16835 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 16836 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 16837 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 16838 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 16839 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 16840 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 16841 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 16842 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 16843 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 16844 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 16845 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 16846 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 16847 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 16848 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 16849 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 16850 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float* 16851 // CHECK: call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], float* [[TMP12]]) 16852 // CHECK: ret void 16853 void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) { 16854 vst1_f32_x3(a, b); 16855 } 16856 16857 // CHECK-LABEL: define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b.coerce) #0 { 16858 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 16859 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 16860 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 16861 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 16862 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* 16863 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* 16864 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16865 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 16866 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 16867 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 16868 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 16869 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 16870 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 16871 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 16872 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 16873 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 16874 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 16875 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 16876 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 16877 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 16878 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 16879 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 16880 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 16881 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double* 16882 // CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]]) 16883 // CHECK: ret void 16884 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) { 16885 vst1_f64_x3(a, b); 16886 } 16887 16888 // CHECK-LABEL: define void @test_vst1_p8_x3(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 16889 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 16890 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 16891 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 16892 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 16893 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 16894 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 16895 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16896 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 16897 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 16898 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 16899 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 16900 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 16901 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 16902 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 16903 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 16904 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 16905 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) 16906 // CHECK: ret void 16907 void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) { 16908 vst1_p8_x3(a, b); 16909 } 16910 16911 // CHECK-LABEL: define void @test_vst1_p16_x3(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 16912 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 16913 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 16914 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 16915 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 16916 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 16917 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 16918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16919 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 16920 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 16921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 16922 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 16923 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 16924 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 16925 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 16926 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 16927 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 16928 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 16929 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 16930 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 16931 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 16932 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 16933 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 16934 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 16935 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* 16936 // CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) 16937 // CHECK: ret void 16938 void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) { 16939 vst1_p16_x3(a, b); 16940 } 16941 16942 // CHECK-LABEL: define void @test_vst1_p64_x3(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 16943 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 16944 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 16945 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0 16946 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 16947 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* 16948 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* 16949 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 16950 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 16951 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 16952 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 16953 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 16954 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 16955 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 16956 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 16957 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 16958 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 16959 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 16960 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 16961 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 16962 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 16963 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 16964 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 16965 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 16966 // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* 16967 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) 16968 // CHECK: ret void 16969 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) { 16970 vst1_p64_x3(a, b); 16971 } 16972 16973 // CHECK-LABEL: define void @test_vst1q_u8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 16974 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 16975 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 16976 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 16977 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 16978 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* 16979 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* 16980 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 16981 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 16982 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 16983 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 16984 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 16985 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 16986 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 16987 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 16988 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 16989 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 16990 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 16991 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 16992 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 16993 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 16994 // CHECK: ret void 16995 void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) { 16996 vst1q_u8_x4(a, b); 16997 } 16998 16999 // CHECK-LABEL: define void @test_vst1q_u16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 17000 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 17001 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 17002 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 17003 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 17004 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 17005 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 17006 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17007 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17008 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 17009 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 17010 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 17011 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 17012 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 17013 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 17014 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 17015 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 17016 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 17017 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 17018 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 17019 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 17020 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 17021 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 17022 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 17023 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 17024 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 17025 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 17026 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 17027 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 17028 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17029 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) 17030 // CHECK: ret void 17031 void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) { 17032 vst1q_u16_x4(a, b); 17033 } 17034 17035 // CHECK-LABEL: define void @test_vst1q_u32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 17036 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 17037 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 17038 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 17039 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 17040 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 17041 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 17042 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17043 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 17044 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 17045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 17046 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 17047 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 17048 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 17049 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 17050 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 17051 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 17052 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 17053 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 17054 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 17055 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 17056 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 17057 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 17058 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 17059 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 17060 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 17061 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 17062 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 17063 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 17064 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* 17065 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]]) 17066 // CHECK: ret void 17067 void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) { 17068 vst1q_u32_x4(a, b); 17069 } 17070 17071 // CHECK-LABEL: define void @test_vst1q_u64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 17072 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 17073 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 17074 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 17075 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 17076 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* 17077 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* 17078 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17079 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17080 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 17081 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 17082 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 17083 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 17084 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 17085 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 17086 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 17087 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 17088 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 17089 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 17090 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 17091 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 17092 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 17093 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 17094 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 17095 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 17096 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 17097 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 17098 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 17099 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 17100 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17101 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) 17102 // CHECK: ret void 17103 void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) { 17104 vst1q_u64_x4(a, b); 17105 } 17106 17107 // CHECK-LABEL: define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 17108 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 17109 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 17110 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 17111 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 17112 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* 17113 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* 17114 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17115 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 17116 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 17117 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 17118 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 17119 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 17120 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 17121 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 17122 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 17123 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 17124 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 17125 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 17126 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 17127 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 17128 // CHECK: ret void 17129 void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) { 17130 vst1q_s8_x4(a, b); 17131 } 17132 17133 // CHECK-LABEL: define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 17134 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 17135 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 17136 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 17137 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 17138 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 17139 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 17140 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17141 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17142 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 17143 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 17144 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 17145 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 17146 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 17147 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 17148 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 17149 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 17150 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 17151 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 17152 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 17153 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 17154 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 17155 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 17156 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 17157 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 17158 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 17159 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 17160 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 17161 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 17162 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17163 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) 17164 // CHECK: ret void 17165 void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) { 17166 vst1q_s16_x4(a, b); 17167 } 17168 17169 // CHECK-LABEL: define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 17170 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 17171 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 17172 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 17173 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 17174 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 17175 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 17176 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17177 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 17178 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 17179 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 17180 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 17181 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 17182 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 17183 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 17184 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 17185 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 17186 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 17187 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 17188 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 17189 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 17190 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 17191 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 17192 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 17193 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 17194 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 17195 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 17196 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 17197 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 17198 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* 17199 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]]) 17200 // CHECK: ret void 17201 void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) { 17202 vst1q_s32_x4(a, b); 17203 } 17204 17205 // CHECK-LABEL: define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 17206 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 17207 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 17208 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 17209 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 17210 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* 17211 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* 17212 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17213 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17214 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 17215 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 17216 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 17217 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 17218 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 17219 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 17220 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 17221 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 17222 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 17223 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 17224 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 17225 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 17226 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 17227 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 17228 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 17229 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 17230 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 17231 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 17232 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 17233 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 17234 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17235 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) 17236 // CHECK: ret void 17237 void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) { 17238 vst1q_s64_x4(a, b); 17239 } 17240 17241 // CHECK-LABEL: define void @test_vst1q_f16_x4(half* %a, [4 x <8 x half>] %b.coerce) #0 { 17242 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 17243 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 17244 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 17245 // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 17246 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 17247 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 17248 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17249 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 17250 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 17251 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 17252 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 17253 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 17254 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 17255 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 17256 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 17257 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 17258 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 17259 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 17260 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 17261 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 17262 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 17263 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 17264 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 17265 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 17266 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 17267 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 17268 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 17269 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 17270 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17271 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) 17272 // CHECK: ret void 17273 void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) { 17274 vst1q_f16_x4(a, b); 17275 } 17276 17277 // CHECK-LABEL: define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b.coerce) #0 { 17278 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 17279 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 17280 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 17281 // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 17282 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 17283 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 17284 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17285 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 17286 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 17287 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 17288 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 17289 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 17290 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 17291 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 17292 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 17293 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 17294 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 17295 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 17296 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 17297 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 17298 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 17299 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 17300 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 17301 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 17302 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 17303 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 17304 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 17305 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 17306 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float* 17307 // CHECK: call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], float* [[TMP15]]) 17308 // CHECK: ret void 17309 void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) { 17310 vst1q_f32_x4(a, b); 17311 } 17312 17313 // CHECK-LABEL: define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b.coerce) #0 { 17314 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 17315 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 17316 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 17317 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 17318 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* 17319 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* 17320 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17321 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 17322 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 17323 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 17324 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 17325 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 17326 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 17327 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 17328 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 17329 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 17330 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 17331 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 17332 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 17333 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 17334 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 17335 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 17336 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 17337 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> 17338 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 17339 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 17340 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 17341 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> 17342 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double* 17343 // CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]]) 17344 // CHECK: ret void 17345 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) { 17346 vst1q_f64_x4(a, b); 17347 } 17348 17349 // CHECK-LABEL: define void @test_vst1q_p8_x4(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 17350 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 17351 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 17352 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 17353 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 17354 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* 17355 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* 17356 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17357 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 17358 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 17359 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 17360 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 17361 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 17362 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 17363 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 17364 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 17365 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 17366 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 17367 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 17368 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 17369 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) 17370 // CHECK: ret void 17371 void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) { 17372 vst1q_p8_x4(a, b); 17373 } 17374 17375 // CHECK-LABEL: define void @test_vst1q_p16_x4(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 17376 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 17377 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 17378 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 17379 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 17380 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 17381 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 17382 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17383 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17384 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 17385 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 17386 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 17387 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 17388 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 17389 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 17390 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 17391 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 17392 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 17393 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 17394 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 17395 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 17396 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 17397 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 17398 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 17399 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 17400 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 17401 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 17402 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 17403 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 17404 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17405 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) 17406 // CHECK: ret void 17407 void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) { 17408 vst1q_p16_x4(a, b); 17409 } 17410 17411 // CHECK-LABEL: define void @test_vst1q_p64_x4(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 17412 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 17413 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 17414 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0 17415 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 17416 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* 17417 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* 17418 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 17419 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17420 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 17421 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 17422 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 17423 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 17424 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 17425 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 17426 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 17427 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 17428 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 17429 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 17430 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 17431 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 17432 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 17433 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 17434 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 17435 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 17436 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 17437 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 17438 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 17439 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 17440 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17441 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) 17442 // CHECK: ret void 17443 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) { 17444 vst1q_p64_x4(a, b); 17445 } 17446 17447 // CHECK-LABEL: define void @test_vst1_u8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 17448 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 17449 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 17450 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 17451 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 17452 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 17453 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 17454 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17455 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 17456 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 17457 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 17458 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 17459 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 17460 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 17461 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 17462 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 17463 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 17464 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 17465 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 17466 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 17467 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 17468 // CHECK: ret void 17469 void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) { 17470 vst1_u8_x4(a, b); 17471 } 17472 17473 // CHECK-LABEL: define void @test_vst1_u16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 17474 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 17475 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 17476 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 17477 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 17478 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 17479 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 17480 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17481 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17482 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 17483 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 17484 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 17485 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 17486 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 17487 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 17488 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 17489 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 17490 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 17491 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 17492 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 17493 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 17494 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 17495 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 17496 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 17497 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 17498 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 17499 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 17500 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 17501 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 17502 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17503 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) 17504 // CHECK: ret void 17505 void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) { 17506 vst1_u16_x4(a, b); 17507 } 17508 17509 // CHECK-LABEL: define void @test_vst1_u32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 17510 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 17511 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 17512 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 17513 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 17514 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 17515 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 17516 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17517 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 17518 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 17519 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 17520 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 17521 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 17522 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 17523 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 17524 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 17525 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 17526 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 17527 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 17528 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 17529 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 17530 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 17531 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 17532 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 17533 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 17534 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 17535 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 17536 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 17537 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 17538 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* 17539 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]]) 17540 // CHECK: ret void 17541 void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) { 17542 vst1_u32_x4(a, b); 17543 } 17544 17545 // CHECK-LABEL: define void @test_vst1_u64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 17546 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 17547 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 17548 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 17549 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 17550 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* 17551 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* 17552 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17553 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17554 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 17555 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 17556 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 17557 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 17558 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 17559 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 17560 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 17561 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 17562 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 17563 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 17564 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 17565 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 17566 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 17567 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 17568 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 17569 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 17570 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 17571 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 17572 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 17573 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 17574 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17575 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) 17576 // CHECK: ret void 17577 void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) { 17578 vst1_u64_x4(a, b); 17579 } 17580 17581 // CHECK-LABEL: define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 17582 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 17583 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 17584 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 17585 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 17586 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 17587 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 17588 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17589 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 17590 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 17591 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 17592 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 17593 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 17594 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 17595 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 17596 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 17597 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 17598 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 17599 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 17600 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 17601 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 17602 // CHECK: ret void 17603 void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) { 17604 vst1_s8_x4(a, b); 17605 } 17606 17607 // CHECK-LABEL: define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 17608 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 17609 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 17610 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 17611 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 17612 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 17613 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 17614 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17615 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17616 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 17617 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 17618 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 17619 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 17620 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 17621 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 17622 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 17623 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 17624 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 17625 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 17626 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 17627 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 17628 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 17629 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 17630 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 17631 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 17632 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 17633 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 17634 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 17635 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 17636 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17637 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) 17638 // CHECK: ret void 17639 void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) { 17640 vst1_s16_x4(a, b); 17641 } 17642 17643 // CHECK-LABEL: define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 17644 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 17645 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 17646 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 17647 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 17648 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 17649 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 17650 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17651 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 17652 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 17653 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 17654 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 17655 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 17656 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 17657 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 17658 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 17659 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 17660 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 17661 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 17662 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 17663 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 17664 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 17665 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 17666 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 17667 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 17668 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 17669 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 17670 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 17671 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 17672 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* 17673 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]]) 17674 // CHECK: ret void 17675 void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) { 17676 vst1_s32_x4(a, b); 17677 } 17678 17679 // CHECK-LABEL: define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 17680 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 17681 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 17682 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 17683 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 17684 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* 17685 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* 17686 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17687 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17688 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 17689 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 17690 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 17691 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 17692 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 17693 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 17694 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 17695 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 17696 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 17697 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 17698 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 17699 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 17700 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 17701 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 17702 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 17703 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 17704 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 17705 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 17706 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 17707 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 17708 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17709 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) 17710 // CHECK: ret void 17711 void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) { 17712 vst1_s64_x4(a, b); 17713 } 17714 17715 // CHECK-LABEL: define void @test_vst1_f16_x4(half* %a, [4 x <4 x half>] %b.coerce) #0 { 17716 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 17717 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 17718 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 17719 // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 17720 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 17721 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 17722 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17723 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 17724 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 17725 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 17726 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 17727 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 17728 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 17729 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 17730 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 17731 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 17732 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 17733 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 17734 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 17735 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 17736 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 17737 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 17738 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 17739 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 17740 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 17741 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 17742 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 17743 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 17744 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17745 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) 17746 // CHECK: ret void 17747 void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) { 17748 vst1_f16_x4(a, b); 17749 } 17750 17751 // CHECK-LABEL: define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b.coerce) #0 { 17752 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 17753 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 17754 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 17755 // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 17756 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 17757 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 17758 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17759 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 17760 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 17761 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 17762 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 17763 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 17764 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 17765 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 17766 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 17767 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 17768 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 17769 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 17770 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 17771 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 17772 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 17773 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 17774 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 17775 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 17776 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 17777 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 17778 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 17779 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 17780 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float* 17781 // CHECK: call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], float* [[TMP15]]) 17782 // CHECK: ret void 17783 void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) { 17784 vst1_f32_x4(a, b); 17785 } 17786 17787 // CHECK-LABEL: define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b.coerce) #0 { 17788 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 17789 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 17790 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 17791 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 17792 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* 17793 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* 17794 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17795 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 17796 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 17797 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 17798 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 17799 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 17800 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 17801 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 17802 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 17803 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 17804 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 17805 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 17806 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 17807 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 17808 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 17809 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 17810 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 17811 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> 17812 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 17813 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 17814 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 17815 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> 17816 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double* 17817 // CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]]) 17818 // CHECK: ret void 17819 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) { 17820 vst1_f64_x4(a, b); 17821 } 17822 17823 // CHECK-LABEL: define void @test_vst1_p8_x4(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 17824 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 17825 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 17826 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 17827 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 17828 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 17829 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 17830 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17831 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 17832 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 17833 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 17834 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 17835 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 17836 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 17837 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 17838 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 17839 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 17840 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 17841 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 17842 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 17843 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) 17844 // CHECK: ret void 17845 void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) { 17846 vst1_p8_x4(a, b); 17847 } 17848 17849 // CHECK-LABEL: define void @test_vst1_p16_x4(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 17850 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 17851 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 17852 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 17853 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 17854 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 17855 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 17856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17857 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 17858 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 17859 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 17860 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 17861 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 17862 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 17863 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 17864 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 17865 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 17866 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 17867 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 17868 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 17869 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 17870 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 17871 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 17872 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 17873 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 17874 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 17875 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 17876 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 17877 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 17878 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* 17879 // CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) 17880 // CHECK: ret void 17881 void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) { 17882 vst1_p16_x4(a, b); 17883 } 17884 17885 // CHECK-LABEL: define void @test_vst1_p64_x4(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 17886 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 17887 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 17888 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0 17889 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 17890 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* 17891 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* 17892 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 17893 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 17894 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 17895 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 17896 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 17897 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 17898 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 17899 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 17900 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 17901 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 17902 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 17903 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 17904 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 17905 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 17906 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 17907 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 17908 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 17909 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 17910 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 17911 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 17912 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 17913 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 17914 // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* 17915 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) 17916 // CHECK: ret void 17917 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) { 17918 vst1_p64_x4(a, b); 17919 } 17920 17921 // CHECK-LABEL: define i64 @test_vceqd_s64(i64 %a, i64 %b) #0 { 17922 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b 17923 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17924 // CHECK: ret i64 [[VCEQD_I]] 17925 int64_t test_vceqd_s64(int64_t a, int64_t b) { 17926 return (int64_t)vceqd_s64(a, b); 17927 } 17928 17929 // CHECK-LABEL: define i64 @test_vceqd_u64(i64 %a, i64 %b) #0 { 17930 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b 17931 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17932 // CHECK: ret i64 [[VCEQD_I]] 17933 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) { 17934 return (int64_t)vceqd_u64(a, b); 17935 } 17936 17937 // CHECK-LABEL: define i64 @test_vceqzd_s64(i64 %a) #0 { 17938 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0 17939 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64 17940 // CHECK: ret i64 [[VCEQZ_I]] 17941 int64_t test_vceqzd_s64(int64_t a) { 17942 return (int64_t)vceqzd_s64(a); 17943 } 17944 17945 // CHECK-LABEL: define i64 @test_vceqzd_u64(i64 %a) #0 { 17946 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0 17947 // CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64 17948 // CHECK: ret i64 [[VCEQZD_I]] 17949 int64_t test_vceqzd_u64(int64_t a) { 17950 return (int64_t)vceqzd_u64(a); 17951 } 17952 17953 // CHECK-LABEL: define i64 @test_vcged_s64(i64 %a, i64 %b) #0 { 17954 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b 17955 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17956 // CHECK: ret i64 [[VCEQD_I]] 17957 int64_t test_vcged_s64(int64_t a, int64_t b) { 17958 return (int64_t)vcged_s64(a, b); 17959 } 17960 17961 // CHECK-LABEL: define i64 @test_vcged_u64(i64 %a, i64 %b) #0 { 17962 // CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b 17963 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17964 // CHECK: ret i64 [[VCEQD_I]] 17965 uint64_t test_vcged_u64(uint64_t a, uint64_t b) { 17966 return (uint64_t)vcged_u64(a, b); 17967 } 17968 17969 // CHECK-LABEL: define i64 @test_vcgezd_s64(i64 %a) #0 { 17970 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0 17971 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 17972 // CHECK: ret i64 [[VCGEZ_I]] 17973 int64_t test_vcgezd_s64(int64_t a) { 17974 return (int64_t)vcgezd_s64(a); 17975 } 17976 17977 // CHECK-LABEL: define i64 @test_vcgtd_s64(i64 %a, i64 %b) #0 { 17978 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b 17979 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17980 // CHECK: ret i64 [[VCEQD_I]] 17981 int64_t test_vcgtd_s64(int64_t a, int64_t b) { 17982 return (int64_t)vcgtd_s64(a, b); 17983 } 17984 17985 // CHECK-LABEL: define i64 @test_vcgtd_u64(i64 %a, i64 %b) #0 { 17986 // CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b 17987 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 17988 // CHECK: ret i64 [[VCEQD_I]] 17989 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) { 17990 return (uint64_t)vcgtd_u64(a, b); 17991 } 17992 17993 // CHECK-LABEL: define i64 @test_vcgtzd_s64(i64 %a) #0 { 17994 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0 17995 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 17996 // CHECK: ret i64 [[VCGTZ_I]] 17997 int64_t test_vcgtzd_s64(int64_t a) { 17998 return (int64_t)vcgtzd_s64(a); 17999 } 18000 18001 // CHECK-LABEL: define i64 @test_vcled_s64(i64 %a, i64 %b) #0 { 18002 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b 18003 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 18004 // CHECK: ret i64 [[VCEQD_I]] 18005 int64_t test_vcled_s64(int64_t a, int64_t b) { 18006 return (int64_t)vcled_s64(a, b); 18007 } 18008 18009 // CHECK-LABEL: define i64 @test_vcled_u64(i64 %a, i64 %b) #0 { 18010 // CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b 18011 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 18012 // CHECK: ret i64 [[VCEQD_I]] 18013 uint64_t test_vcled_u64(uint64_t a, uint64_t b) { 18014 return (uint64_t)vcled_u64(a, b); 18015 } 18016 18017 // CHECK-LABEL: define i64 @test_vclezd_s64(i64 %a) #0 { 18018 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0 18019 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 18020 // CHECK: ret i64 [[VCLEZ_I]] 18021 int64_t test_vclezd_s64(int64_t a) { 18022 return (int64_t)vclezd_s64(a); 18023 } 18024 18025 // CHECK-LABEL: define i64 @test_vcltd_s64(i64 %a, i64 %b) #0 { 18026 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b 18027 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 18028 // CHECK: ret i64 [[VCEQD_I]] 18029 int64_t test_vcltd_s64(int64_t a, int64_t b) { 18030 return (int64_t)vcltd_s64(a, b); 18031 } 18032 18033 // CHECK-LABEL: define i64 @test_vcltd_u64(i64 %a, i64 %b) #0 { 18034 // CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b 18035 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 18036 // CHECK: ret i64 [[VCEQD_I]] 18037 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) { 18038 return (uint64_t)vcltd_u64(a, b); 18039 } 18040 18041 // CHECK-LABEL: define i64 @test_vcltzd_s64(i64 %a) #0 { 18042 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0 18043 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 18044 // CHECK: ret i64 [[VCLTZ_I]] 18045 int64_t test_vcltzd_s64(int64_t a) { 18046 return (int64_t)vcltzd_s64(a); 18047 } 18048 18049 // CHECK-LABEL: define i64 @test_vtstd_s64(i64 %a, i64 %b) #0 { 18050 // CHECK: [[TMP0:%.*]] = and i64 %a, %b 18051 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 18052 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64 18053 // CHECK: ret i64 [[VTSTD_I]] 18054 int64_t test_vtstd_s64(int64_t a, int64_t b) { 18055 return (int64_t)vtstd_s64(a, b); 18056 } 18057 18058 // CHECK-LABEL: define i64 @test_vtstd_u64(i64 %a, i64 %b) #0 { 18059 // CHECK: [[TMP0:%.*]] = and i64 %a, %b 18060 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 18061 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64 18062 // CHECK: ret i64 [[VTSTD_I]] 18063 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) { 18064 return (uint64_t)vtstd_u64(a, b); 18065 } 18066 18067 // CHECK-LABEL: define i64 @test_vabsd_s64(i64 %a) #0 { 18068 // CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) #4 18069 // CHECK: ret i64 [[VABSD_S64_I]] 18070 int64_t test_vabsd_s64(int64_t a) { 18071 return (int64_t)vabsd_s64(a); 18072 } 18073 18074 // CHECK-LABEL: define i8 @test_vqabsb_s8(i8 %a) #0 { 18075 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18076 // CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) #4 18077 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0 18078 // CHECK: ret i8 [[TMP1]] 18079 int8_t test_vqabsb_s8(int8_t a) { 18080 return (int8_t)vqabsb_s8(a); 18081 } 18082 18083 // CHECK-LABEL: define i16 @test_vqabsh_s16(i16 %a) #0 { 18084 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18085 // CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) #4 18086 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0 18087 // CHECK: ret i16 [[TMP1]] 18088 int16_t test_vqabsh_s16(int16_t a) { 18089 return (int16_t)vqabsh_s16(a); 18090 } 18091 18092 // CHECK-LABEL: define i32 @test_vqabss_s32(i32 %a) #0 { 18093 // CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) #4 18094 // CHECK: ret i32 [[VQABSS_S32_I]] 18095 int32_t test_vqabss_s32(int32_t a) { 18096 return (int32_t)vqabss_s32(a); 18097 } 18098 18099 // CHECK-LABEL: define i64 @test_vqabsd_s64(i64 %a) #0 { 18100 // CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) #4 18101 // CHECK: ret i64 [[VQABSD_S64_I]] 18102 int64_t test_vqabsd_s64(int64_t a) { 18103 return (int64_t)vqabsd_s64(a); 18104 } 18105 18106 // CHECK-LABEL: define i64 @test_vnegd_s64(i64 %a) #0 { 18107 // CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a 18108 // CHECK: ret i64 [[VNEGD_I]] 18109 int64_t test_vnegd_s64(int64_t a) { 18110 return (int64_t)vnegd_s64(a); 18111 } 18112 18113 // CHECK-LABEL: define i8 @test_vqnegb_s8(i8 %a) #0 { 18114 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18115 // CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) #4 18116 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0 18117 // CHECK: ret i8 [[TMP1]] 18118 int8_t test_vqnegb_s8(int8_t a) { 18119 return (int8_t)vqnegb_s8(a); 18120 } 18121 18122 // CHECK-LABEL: define i16 @test_vqnegh_s16(i16 %a) #0 { 18123 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18124 // CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) #4 18125 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0 18126 // CHECK: ret i16 [[TMP1]] 18127 int16_t test_vqnegh_s16(int16_t a) { 18128 return (int16_t)vqnegh_s16(a); 18129 } 18130 18131 // CHECK-LABEL: define i32 @test_vqnegs_s32(i32 %a) #0 { 18132 // CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) #4 18133 // CHECK: ret i32 [[VQNEGS_S32_I]] 18134 int32_t test_vqnegs_s32(int32_t a) { 18135 return (int32_t)vqnegs_s32(a); 18136 } 18137 18138 // CHECK-LABEL: define i64 @test_vqnegd_s64(i64 %a) #0 { 18139 // CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) #4 18140 // CHECK: ret i64 [[VQNEGD_S64_I]] 18141 int64_t test_vqnegd_s64(int64_t a) { 18142 return (int64_t)vqnegd_s64(a); 18143 } 18144 18145 // CHECK-LABEL: define i8 @test_vuqaddb_s8(i8 %a, i8 %b) #0 { 18146 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18147 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 18148 // CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 18149 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0 18150 // CHECK: ret i8 [[TMP2]] 18151 int8_t test_vuqaddb_s8(int8_t a, int8_t b) { 18152 return (int8_t)vuqaddb_s8(a, b); 18153 } 18154 18155 // CHECK-LABEL: define i16 @test_vuqaddh_s16(i16 %a, i16 %b) #0 { 18156 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18157 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18158 // CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18159 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0 18160 // CHECK: ret i16 [[TMP2]] 18161 int16_t test_vuqaddh_s16(int16_t a, int16_t b) { 18162 return (int16_t)vuqaddh_s16(a, b); 18163 } 18164 18165 // CHECK-LABEL: define i32 @test_vuqadds_s32(i32 %a, i32 %b) #0 { 18166 // CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) #4 18167 // CHECK: ret i32 [[VUQADDS_S32_I]] 18168 int32_t test_vuqadds_s32(int32_t a, int32_t b) { 18169 return (int32_t)vuqadds_s32(a, b); 18170 } 18171 18172 // CHECK-LABEL: define i64 @test_vuqaddd_s64(i64 %a, i64 %b) #0 { 18173 // CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) #4 18174 // CHECK: ret i64 [[VUQADDD_S64_I]] 18175 int64_t test_vuqaddd_s64(int64_t a, int64_t b) { 18176 return (int64_t)vuqaddd_s64(a, b); 18177 } 18178 18179 // CHECK-LABEL: define i8 @test_vsqaddb_u8(i8 %a, i8 %b) #0 { 18180 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18181 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 %b, i64 0 18182 // CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) #4 18183 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0 18184 // CHECK: ret i8 [[TMP2]] 18185 uint8_t test_vsqaddb_u8(uint8_t a, uint8_t b) { 18186 return (uint8_t)vsqaddb_u8(a, b); 18187 } 18188 18189 // CHECK-LABEL: define i16 @test_vsqaddh_u16(i16 %a, i16 %b) #0 { 18190 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18191 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18192 // CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18193 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0 18194 // CHECK: ret i16 [[TMP2]] 18195 uint16_t test_vsqaddh_u16(uint16_t a, uint16_t b) { 18196 return (uint16_t)vsqaddh_u16(a, b); 18197 } 18198 18199 // CHECK-LABEL: define i32 @test_vsqadds_u32(i32 %a, i32 %b) #0 { 18200 // CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) #4 18201 // CHECK: ret i32 [[VSQADDS_U32_I]] 18202 uint32_t test_vsqadds_u32(uint32_t a, uint32_t b) { 18203 return (uint32_t)vsqadds_u32(a, b); 18204 } 18205 18206 // CHECK-LABEL: define i64 @test_vsqaddd_u64(i64 %a, i64 %b) #0 { 18207 // CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) #4 18208 // CHECK: ret i64 [[VSQADDD_U64_I]] 18209 uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { 18210 return (uint64_t)vsqaddd_u64(a, b); 18211 } 18212 18213 // CHECK-LABEL: define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) #0 { 18214 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18215 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0 18216 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18217 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0 18218 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) #4 18219 // CHECK: ret i32 [[VQDMLXL1_I]] 18220 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { 18221 18222 return (int32_t)vqdmlalh_s16(a, b, c); 18223 } 18224 18225 // CHECK-LABEL: define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) #0 { 18226 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4 18227 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) #4 18228 // CHECK: ret i64 [[VQDMLXL1_I]] 18229 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { 18230 return (int64_t)vqdmlals_s32(a, b, c); 18231 } 18232 18233 // CHECK-LABEL: define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) #0 { 18234 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18235 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %c, i64 0 18236 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18237 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0 18238 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) #4 18239 // CHECK: ret i32 [[VQDMLXL1_I]] 18240 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { 18241 18242 return (int32_t)vqdmlslh_s16(a, b, c); 18243 } 18244 18245 // CHECK-LABEL: define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) #0 { 18246 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) #4 18247 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) #4 18248 // CHECK: ret i64 [[VQDMLXL1_I]] 18249 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { 18250 return (int64_t)vqdmlsls_s32(a, b, c); 18251 } 18252 18253 // CHECK-LABEL: define i32 @test_vqdmullh_s16(i16 %a, i16 %b) #0 { 18254 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18255 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 18256 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) #4 18257 // CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0 18258 // CHECK: ret i32 [[TMP2]] 18259 int32_t test_vqdmullh_s16(int16_t a, int16_t b) { 18260 return (int32_t)vqdmullh_s16(a, b); 18261 } 18262 18263 // CHECK-LABEL: define i64 @test_vqdmulls_s32(i32 %a, i32 %b) #0 { 18264 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) #4 18265 // CHECK: ret i64 [[VQDMULLS_S32_I]] 18266 int64_t test_vqdmulls_s32(int32_t a, int32_t b) { 18267 return (int64_t)vqdmulls_s32(a, b); 18268 } 18269 18270 // CHECK-LABEL: define i8 @test_vqmovunh_s16(i16 %a) #0 { 18271 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 18272 // CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) #4 18273 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0 18274 // CHECK: ret i8 [[TMP1]] 18275 int8_t test_vqmovunh_s16(int16_t a) { 18276 return (int8_t)vqmovunh_s16(a); 18277 } 18278 18279 // CHECK-LABEL: define i16 @test_vqmovuns_s32(i32 %a) #0 { 18280 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 18281 // CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) #4 18282 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0 18283 // CHECK: ret i16 [[TMP1]] 18284 int16_t test_vqmovuns_s32(int32_t a) { 18285 return (int16_t)vqmovuns_s32(a); 18286 } 18287 18288 // CHECK-LABEL: define i32 @test_vqmovund_s64(i64 %a) #0 { 18289 // CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) #4 18290 // CHECK: ret i32 [[VQMOVUND_S64_I]] 18291 int32_t test_vqmovund_s64(int64_t a) { 18292 return (int32_t)vqmovund_s64(a); 18293 } 18294 18295 // CHECK-LABEL: define i8 @test_vqmovnh_s16(i16 %a) #0 { 18296 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 18297 // CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) #4 18298 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0 18299 // CHECK: ret i8 [[TMP1]] 18300 int8_t test_vqmovnh_s16(int16_t a) { 18301 return (int8_t)vqmovnh_s16(a); 18302 } 18303 18304 // CHECK-LABEL: define i16 @test_vqmovns_s32(i32 %a) #0 { 18305 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 18306 // CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) #4 18307 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0 18308 // CHECK: ret i16 [[TMP1]] 18309 int16_t test_vqmovns_s32(int32_t a) { 18310 return (int16_t)vqmovns_s32(a); 18311 } 18312 18313 // CHECK-LABEL: define i32 @test_vqmovnd_s64(i64 %a) #0 { 18314 // CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) #4 18315 // CHECK: ret i32 [[VQMOVND_S64_I]] 18316 int32_t test_vqmovnd_s64(int64_t a) { 18317 return (int32_t)vqmovnd_s64(a); 18318 } 18319 18320 // CHECK-LABEL: define i8 @test_vqmovnh_u16(i16 %a) #0 { 18321 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 18322 // CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) #4 18323 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0 18324 // CHECK: ret i8 [[TMP1]] 18325 int8_t test_vqmovnh_u16(int16_t a) { 18326 return (int8_t)vqmovnh_u16(a); 18327 } 18328 18329 // CHECK-LABEL: define i16 @test_vqmovns_u32(i32 %a) #0 { 18330 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 18331 // CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) #4 18332 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0 18333 // CHECK: ret i16 [[TMP1]] 18334 int16_t test_vqmovns_u32(int32_t a) { 18335 return (int16_t)vqmovns_u32(a); 18336 } 18337 18338 // CHECK-LABEL: define i32 @test_vqmovnd_u64(i64 %a) #0 { 18339 // CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) #4 18340 // CHECK: ret i32 [[VQMOVND_U64_I]] 18341 int32_t test_vqmovnd_u64(int64_t a) { 18342 return (int32_t)vqmovnd_u64(a); 18343 } 18344 18345 // CHECK-LABEL: define i32 @test_vceqs_f32(float %a, float %b) #0 { 18346 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b 18347 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18348 // CHECK: ret i32 [[VCMPD_I]] 18349 uint32_t test_vceqs_f32(float32_t a, float32_t b) { 18350 return (uint32_t)vceqs_f32(a, b); 18351 } 18352 18353 // CHECK-LABEL: define i64 @test_vceqd_f64(double %a, double %b) #0 { 18354 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b 18355 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18356 // CHECK: ret i64 [[VCMPD_I]] 18357 uint64_t test_vceqd_f64(float64_t a, float64_t b) { 18358 return (uint64_t)vceqd_f64(a, b); 18359 } 18360 18361 // CHECK-LABEL: define i32 @test_vceqzs_f32(float %a) #0 { 18362 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00 18363 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32 18364 // CHECK: ret i32 [[VCEQZ_I]] 18365 uint32_t test_vceqzs_f32(float32_t a) { 18366 return (uint32_t)vceqzs_f32(a); 18367 } 18368 18369 // CHECK-LABEL: define i64 @test_vceqzd_f64(double %a) #0 { 18370 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00 18371 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64 18372 // CHECK: ret i64 [[VCEQZ_I]] 18373 uint64_t test_vceqzd_f64(float64_t a) { 18374 return (uint64_t)vceqzd_f64(a); 18375 } 18376 18377 // CHECK-LABEL: define i32 @test_vcges_f32(float %a, float %b) #0 { 18378 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b 18379 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18380 // CHECK: ret i32 [[VCMPD_I]] 18381 uint32_t test_vcges_f32(float32_t a, float32_t b) { 18382 return (uint32_t)vcges_f32(a, b); 18383 } 18384 18385 // CHECK-LABEL: define i64 @test_vcged_f64(double %a, double %b) #0 { 18386 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b 18387 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18388 // CHECK: ret i64 [[VCMPD_I]] 18389 uint64_t test_vcged_f64(float64_t a, float64_t b) { 18390 return (uint64_t)vcged_f64(a, b); 18391 } 18392 18393 // CHECK-LABEL: define i32 @test_vcgezs_f32(float %a) #0 { 18394 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00 18395 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32 18396 // CHECK: ret i32 [[VCGEZ_I]] 18397 uint32_t test_vcgezs_f32(float32_t a) { 18398 return (uint32_t)vcgezs_f32(a); 18399 } 18400 18401 // CHECK-LABEL: define i64 @test_vcgezd_f64(double %a) #0 { 18402 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00 18403 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 18404 // CHECK: ret i64 [[VCGEZ_I]] 18405 uint64_t test_vcgezd_f64(float64_t a) { 18406 return (uint64_t)vcgezd_f64(a); 18407 } 18408 18409 // CHECK-LABEL: define i32 @test_vcgts_f32(float %a, float %b) #0 { 18410 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b 18411 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18412 // CHECK: ret i32 [[VCMPD_I]] 18413 uint32_t test_vcgts_f32(float32_t a, float32_t b) { 18414 return (uint32_t)vcgts_f32(a, b); 18415 } 18416 18417 // CHECK-LABEL: define i64 @test_vcgtd_f64(double %a, double %b) #0 { 18418 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b 18419 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18420 // CHECK: ret i64 [[VCMPD_I]] 18421 uint64_t test_vcgtd_f64(float64_t a, float64_t b) { 18422 return (uint64_t)vcgtd_f64(a, b); 18423 } 18424 18425 // CHECK-LABEL: define i32 @test_vcgtzs_f32(float %a) #0 { 18426 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00 18427 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32 18428 // CHECK: ret i32 [[VCGTZ_I]] 18429 uint32_t test_vcgtzs_f32(float32_t a) { 18430 return (uint32_t)vcgtzs_f32(a); 18431 } 18432 18433 // CHECK-LABEL: define i64 @test_vcgtzd_f64(double %a) #0 { 18434 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00 18435 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 18436 // CHECK: ret i64 [[VCGTZ_I]] 18437 uint64_t test_vcgtzd_f64(float64_t a) { 18438 return (uint64_t)vcgtzd_f64(a); 18439 } 18440 18441 // CHECK-LABEL: define i32 @test_vcles_f32(float %a, float %b) #0 { 18442 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b 18443 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18444 // CHECK: ret i32 [[VCMPD_I]] 18445 uint32_t test_vcles_f32(float32_t a, float32_t b) { 18446 return (uint32_t)vcles_f32(a, b); 18447 } 18448 18449 // CHECK-LABEL: define i64 @test_vcled_f64(double %a, double %b) #0 { 18450 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b 18451 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18452 // CHECK: ret i64 [[VCMPD_I]] 18453 uint64_t test_vcled_f64(float64_t a, float64_t b) { 18454 return (uint64_t)vcled_f64(a, b); 18455 } 18456 18457 // CHECK-LABEL: define i32 @test_vclezs_f32(float %a) #0 { 18458 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00 18459 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32 18460 // CHECK: ret i32 [[VCLEZ_I]] 18461 uint32_t test_vclezs_f32(float32_t a) { 18462 return (uint32_t)vclezs_f32(a); 18463 } 18464 18465 // CHECK-LABEL: define i64 @test_vclezd_f64(double %a) #0 { 18466 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00 18467 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 18468 // CHECK: ret i64 [[VCLEZ_I]] 18469 uint64_t test_vclezd_f64(float64_t a) { 18470 return (uint64_t)vclezd_f64(a); 18471 } 18472 18473 // CHECK-LABEL: define i32 @test_vclts_f32(float %a, float %b) #0 { 18474 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b 18475 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 18476 // CHECK: ret i32 [[VCMPD_I]] 18477 uint32_t test_vclts_f32(float32_t a, float32_t b) { 18478 return (uint32_t)vclts_f32(a, b); 18479 } 18480 18481 // CHECK-LABEL: define i64 @test_vcltd_f64(double %a, double %b) #0 { 18482 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b 18483 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 18484 // CHECK: ret i64 [[VCMPD_I]] 18485 uint64_t test_vcltd_f64(float64_t a, float64_t b) { 18486 return (uint64_t)vcltd_f64(a, b); 18487 } 18488 18489 // CHECK-LABEL: define i32 @test_vcltzs_f32(float %a) #0 { 18490 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00 18491 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32 18492 // CHECK: ret i32 [[VCLTZ_I]] 18493 uint32_t test_vcltzs_f32(float32_t a) { 18494 return (uint32_t)vcltzs_f32(a); 18495 } 18496 18497 // CHECK-LABEL: define i64 @test_vcltzd_f64(double %a) #0 { 18498 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00 18499 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 18500 // CHECK: ret i64 [[VCLTZ_I]] 18501 uint64_t test_vcltzd_f64(float64_t a) { 18502 return (uint64_t)vcltzd_f64(a); 18503 } 18504 18505 // CHECK-LABEL: define i32 @test_vcages_f32(float %a, float %b) #0 { 18506 // CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) #4 18507 // CHECK: ret i32 [[VCAGES_F32_I]] 18508 uint32_t test_vcages_f32(float32_t a, float32_t b) { 18509 return (uint32_t)vcages_f32(a, b); 18510 } 18511 18512 // CHECK-LABEL: define i64 @test_vcaged_f64(double %a, double %b) #0 { 18513 // CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) #4 18514 // CHECK: ret i64 [[VCAGED_F64_I]] 18515 uint64_t test_vcaged_f64(float64_t a, float64_t b) { 18516 return (uint64_t)vcaged_f64(a, b); 18517 } 18518 18519 // CHECK-LABEL: define i32 @test_vcagts_f32(float %a, float %b) #0 { 18520 // CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) #4 18521 // CHECK: ret i32 [[VCAGTS_F32_I]] 18522 uint32_t test_vcagts_f32(float32_t a, float32_t b) { 18523 return (uint32_t)vcagts_f32(a, b); 18524 } 18525 18526 // CHECK-LABEL: define i64 @test_vcagtd_f64(double %a, double %b) #0 { 18527 // CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) #4 18528 // CHECK: ret i64 [[VCAGTD_F64_I]] 18529 uint64_t test_vcagtd_f64(float64_t a, float64_t b) { 18530 return (uint64_t)vcagtd_f64(a, b); 18531 } 18532 18533 // CHECK-LABEL: define i32 @test_vcales_f32(float %a, float %b) #0 { 18534 // CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) #4 18535 // CHECK: ret i32 [[VCALES_F32_I]] 18536 uint32_t test_vcales_f32(float32_t a, float32_t b) { 18537 return (uint32_t)vcales_f32(a, b); 18538 } 18539 18540 // CHECK-LABEL: define i64 @test_vcaled_f64(double %a, double %b) #0 { 18541 // CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) #4 18542 // CHECK: ret i64 [[VCALED_F64_I]] 18543 uint64_t test_vcaled_f64(float64_t a, float64_t b) { 18544 return (uint64_t)vcaled_f64(a, b); 18545 } 18546 18547 // CHECK-LABEL: define i32 @test_vcalts_f32(float %a, float %b) #0 { 18548 // CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) #4 18549 // CHECK: ret i32 [[VCALTS_F32_I]] 18550 uint32_t test_vcalts_f32(float32_t a, float32_t b) { 18551 return (uint32_t)vcalts_f32(a, b); 18552 } 18553 18554 // CHECK-LABEL: define i64 @test_vcaltd_f64(double %a, double %b) #0 { 18555 // CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) #4 18556 // CHECK: ret i64 [[VCALTD_F64_I]] 18557 uint64_t test_vcaltd_f64(float64_t a, float64_t b) { 18558 return (uint64_t)vcaltd_f64(a, b); 18559 } 18560 18561 // CHECK-LABEL: define i64 @test_vshrd_n_s64(i64 %a) #0 { 18562 // CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1 18563 // CHECK: ret i64 [[SHRD_N]] 18564 int64_t test_vshrd_n_s64(int64_t a) { 18565 return (int64_t)vshrd_n_s64(a, 1); 18566 } 18567 18568 // CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 { 18569 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18571 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1> 18572 // CHECK: ret <1 x i64> [[VSHR_N]] 18573 int64x1_t test_vshr_n_s64(int64x1_t a) { 18574 return vshr_n_s64(a, 1); 18575 } 18576 18577 // CHECK-LABEL: define i64 @test_vshrd_n_u64(i64 %a) #0 { 18578 // CHECK: ret i64 0 18579 uint64_t test_vshrd_n_u64(uint64_t a) { 18580 18581 return (uint64_t)vshrd_n_u64(a, 64); 18582 } 18583 18584 // CHECK-LABEL: define i64 @test_vshrd_n_u64_2() #0 { 18585 // CHECK: ret i64 0 18586 uint64_t test_vshrd_n_u64_2() { 18587 18588 uint64_t a = UINT64_C(0xf000000000000000); 18589 return vshrd_n_u64(a, 64); 18590 } 18591 18592 // CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 { 18593 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18594 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18595 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1> 18596 // CHECK: ret <1 x i64> [[VSHR_N]] 18597 uint64x1_t test_vshr_n_u64(uint64x1_t a) { 18598 return vshr_n_u64(a, 1); 18599 } 18600 18601 // CHECK-LABEL: define i64 @test_vrshrd_n_s64(i64 %a) #0 { 18602 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63) 18603 // CHECK: ret i64 [[VRSHR_N]] 18604 int64_t test_vrshrd_n_s64(int64_t a) { 18605 return (int64_t)vrshrd_n_s64(a, 63); 18606 } 18607 18608 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 { 18609 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18610 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18611 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 18612 // CHECK: ret <1 x i64> [[VRSHR_N1]] 18613 int64x1_t test_vrshr_n_s64(int64x1_t a) { 18614 return vrshr_n_s64(a, 1); 18615 } 18616 18617 // CHECK-LABEL: define i64 @test_vrshrd_n_u64(i64 %a) #0 { 18618 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63) 18619 // CHECK: ret i64 [[VRSHR_N]] 18620 uint64_t test_vrshrd_n_u64(uint64_t a) { 18621 return (uint64_t)vrshrd_n_u64(a, 63); 18622 } 18623 18624 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 { 18625 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18626 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18627 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 18628 // CHECK: ret <1 x i64> [[VRSHR_N1]] 18629 uint64x1_t test_vrshr_n_u64(uint64x1_t a) { 18630 return vrshr_n_u64(a, 1); 18631 } 18632 18633 // CHECK-LABEL: define i64 @test_vsrad_n_s64(i64 %a, i64 %b) #0 { 18634 // CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63 18635 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]] 18636 // CHECK: ret i64 [[TMP0]] 18637 int64_t test_vsrad_n_s64(int64_t a, int64_t b) { 18638 return (int64_t)vsrad_n_s64(a, b, 63); 18639 } 18640 18641 // CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 18642 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18643 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18644 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18645 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18646 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1> 18647 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 18648 // CHECK: ret <1 x i64> [[TMP4]] 18649 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { 18650 return vsra_n_s64(a, b, 1); 18651 } 18652 18653 // CHECK-LABEL: define i64 @test_vsrad_n_u64(i64 %a, i64 %b) #0 { 18654 // CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63 18655 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]] 18656 // CHECK: ret i64 [[TMP0]] 18657 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) { 18658 return (uint64_t)vsrad_n_u64(a, b, 63); 18659 } 18660 18661 // CHECK-LABEL: define i64 @test_vsrad_n_u64_2(i64 %a, i64 %b) #0 { 18662 // CHECK: ret i64 %a 18663 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) { 18664 18665 return (uint64_t)vsrad_n_u64(a, b, 64); 18666 } 18667 18668 // CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 18669 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18670 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18671 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18672 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18673 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1> 18674 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 18675 // CHECK: ret <1 x i64> [[TMP4]] 18676 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { 18677 return vsra_n_u64(a, b, 1); 18678 } 18679 18680 // CHECK-LABEL: define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) #0 { 18681 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63) 18682 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]] 18683 // CHECK: ret i64 [[TMP1]] 18684 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) { 18685 return (int64_t)vrsrad_n_s64(a, b, 63); 18686 } 18687 18688 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 18689 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18690 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18691 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18692 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 18693 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18694 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] 18695 // CHECK: ret <1 x i64> [[TMP3]] 18696 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { 18697 return vrsra_n_s64(a, b, 1); 18698 } 18699 18700 // CHECK-LABEL: define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) #0 { 18701 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63) 18702 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]] 18703 // CHECK: ret i64 [[TMP1]] 18704 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) { 18705 return (uint64_t)vrsrad_n_u64(a, b, 63); 18706 } 18707 18708 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 18709 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18710 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18711 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18712 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 18713 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18714 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] 18715 // CHECK: ret <1 x i64> [[TMP3]] 18716 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { 18717 return vrsra_n_u64(a, b, 1); 18718 } 18719 18720 // CHECK-LABEL: define i64 @test_vshld_n_s64(i64 %a) #0 { 18721 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1 18722 // CHECK: ret i64 [[SHLD_N]] 18723 int64_t test_vshld_n_s64(int64_t a) { 18724 return (int64_t)vshld_n_s64(a, 1); 18725 } 18726 // CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 { 18727 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18728 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18729 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1> 18730 // CHECK: ret <1 x i64> [[VSHL_N]] 18731 int64x1_t test_vshl_n_s64(int64x1_t a) { 18732 return vshl_n_s64(a, 1); 18733 } 18734 18735 // CHECK-LABEL: define i64 @test_vshld_n_u64(i64 %a) #0 { 18736 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63 18737 // CHECK: ret i64 [[SHLD_N]] 18738 uint64_t test_vshld_n_u64(uint64_t a) { 18739 return (uint64_t)vshld_n_u64(a, 63); 18740 } 18741 18742 // CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 { 18743 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18744 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18745 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1> 18746 // CHECK: ret <1 x i64> [[VSHL_N]] 18747 uint64x1_t test_vshl_n_u64(uint64x1_t a) { 18748 return vshl_n_u64(a, 1); 18749 } 18750 18751 // CHECK-LABEL: define i8 @test_vqshlb_n_s8(i8 %a) #0 { 18752 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18753 // CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) 18754 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0 18755 // CHECK: ret i8 [[TMP1]] 18756 int8_t test_vqshlb_n_s8(int8_t a) { 18757 return (int8_t)vqshlb_n_s8(a, 7); 18758 } 18759 18760 // CHECK-LABEL: define i16 @test_vqshlh_n_s16(i16 %a) #0 { 18761 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18762 // CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>) 18763 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0 18764 // CHECK: ret i16 [[TMP1]] 18765 int16_t test_vqshlh_n_s16(int16_t a) { 18766 return (int16_t)vqshlh_n_s16(a, 15); 18767 } 18768 18769 // CHECK-LABEL: define i32 @test_vqshls_n_s32(i32 %a) #0 { 18770 // CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31) 18771 // CHECK: ret i32 [[VQSHLS_N_S32]] 18772 int32_t test_vqshls_n_s32(int32_t a) { 18773 return (int32_t)vqshls_n_s32(a, 31); 18774 } 18775 18776 // CHECK-LABEL: define i64 @test_vqshld_n_s64(i64 %a) #0 { 18777 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63) 18778 // CHECK: ret i64 [[VQSHL_N]] 18779 int64_t test_vqshld_n_s64(int64_t a) { 18780 return (int64_t)vqshld_n_s64(a, 63); 18781 } 18782 18783 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 { 18784 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) 18785 // CHECK: ret <8 x i8> [[VQSHL_N]] 18786 int8x8_t test_vqshl_n_s8(int8x8_t a) { 18787 return vqshl_n_s8(a, 0); 18788 } 18789 18790 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 { 18791 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) 18792 // CHECK: ret <16 x i8> [[VQSHL_N]] 18793 int8x16_t test_vqshlq_n_s8(int8x16_t a) { 18794 return vqshlq_n_s8(a, 0); 18795 } 18796 18797 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 { 18798 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 18799 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 18800 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) 18801 // CHECK: ret <4 x i16> [[VQSHL_N1]] 18802 int16x4_t test_vqshl_n_s16(int16x4_t a) { 18803 return vqshl_n_s16(a, 0); 18804 } 18805 18806 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 { 18807 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 18808 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 18809 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) 18810 // CHECK: ret <8 x i16> [[VQSHL_N1]] 18811 int16x8_t test_vqshlq_n_s16(int16x8_t a) { 18812 return vqshlq_n_s16(a, 0); 18813 } 18814 18815 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 { 18816 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 18817 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 18818 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) 18819 // CHECK: ret <2 x i32> [[VQSHL_N1]] 18820 int32x2_t test_vqshl_n_s32(int32x2_t a) { 18821 return vqshl_n_s32(a, 0); 18822 } 18823 18824 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 { 18825 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 18826 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 18827 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) 18828 // CHECK: ret <4 x i32> [[VQSHL_N1]] 18829 int32x4_t test_vqshlq_n_s32(int32x4_t a) { 18830 return vqshlq_n_s32(a, 0); 18831 } 18832 18833 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 { 18834 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 18835 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 18836 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) 18837 // CHECK: ret <2 x i64> [[VQSHL_N1]] 18838 int64x2_t test_vqshlq_n_s64(int64x2_t a) { 18839 return vqshlq_n_s64(a, 0); 18840 } 18841 18842 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 { 18843 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) 18844 // CHECK: ret <8 x i8> [[VQSHL_N]] 18845 uint8x8_t test_vqshl_n_u8(uint8x8_t a) { 18846 return vqshl_n_u8(a, 0); 18847 } 18848 18849 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 { 18850 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) 18851 // CHECK: ret <16 x i8> [[VQSHL_N]] 18852 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { 18853 return vqshlq_n_u8(a, 0); 18854 } 18855 18856 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 { 18857 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 18858 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 18859 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) 18860 // CHECK: ret <4 x i16> [[VQSHL_N1]] 18861 uint16x4_t test_vqshl_n_u16(uint16x4_t a) { 18862 return vqshl_n_u16(a, 0); 18863 } 18864 18865 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 { 18866 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 18867 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 18868 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) 18869 // CHECK: ret <8 x i16> [[VQSHL_N1]] 18870 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { 18871 return vqshlq_n_u16(a, 0); 18872 } 18873 18874 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 { 18875 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 18876 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 18877 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) 18878 // CHECK: ret <2 x i32> [[VQSHL_N1]] 18879 uint32x2_t test_vqshl_n_u32(uint32x2_t a) { 18880 return vqshl_n_u32(a, 0); 18881 } 18882 18883 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 { 18884 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 18885 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 18886 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) 18887 // CHECK: ret <4 x i32> [[VQSHL_N1]] 18888 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { 18889 return vqshlq_n_u32(a, 0); 18890 } 18891 18892 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 { 18893 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 18894 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 18895 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) 18896 // CHECK: ret <2 x i64> [[VQSHL_N1]] 18897 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { 18898 return vqshlq_n_u64(a, 0); 18899 } 18900 18901 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 { 18902 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18903 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18904 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>) 18905 // CHECK: ret <1 x i64> [[VQSHL_N1]] 18906 int64x1_t test_vqshl_n_s64(int64x1_t a) { 18907 return vqshl_n_s64(a, 1); 18908 } 18909 18910 // CHECK-LABEL: define i8 @test_vqshlb_n_u8(i8 %a) #0 { 18911 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18912 // CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) 18913 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0 18914 // CHECK: ret i8 [[TMP1]] 18915 uint8_t test_vqshlb_n_u8(uint8_t a) { 18916 return (uint8_t)vqshlb_n_u8(a, 7); 18917 } 18918 18919 // CHECK-LABEL: define i16 @test_vqshlh_n_u16(i16 %a) #0 { 18920 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18921 // CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>) 18922 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0 18923 // CHECK: ret i16 [[TMP1]] 18924 uint16_t test_vqshlh_n_u16(uint16_t a) { 18925 return (uint16_t)vqshlh_n_u16(a, 15); 18926 } 18927 18928 // CHECK-LABEL: define i32 @test_vqshls_n_u32(i32 %a) #0 { 18929 // CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31) 18930 // CHECK: ret i32 [[VQSHLS_N_U32]] 18931 uint32_t test_vqshls_n_u32(uint32_t a) { 18932 return (uint32_t)vqshls_n_u32(a, 31); 18933 } 18934 18935 // CHECK-LABEL: define i64 @test_vqshld_n_u64(i64 %a) #0 { 18936 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63) 18937 // CHECK: ret i64 [[VQSHL_N]] 18938 uint64_t test_vqshld_n_u64(uint64_t a) { 18939 return (uint64_t)vqshld_n_u64(a, 63); 18940 } 18941 18942 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 { 18943 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18944 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18945 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>) 18946 // CHECK: ret <1 x i64> [[VQSHL_N1]] 18947 uint64x1_t test_vqshl_n_u64(uint64x1_t a) { 18948 return vqshl_n_u64(a, 1); 18949 } 18950 18951 // CHECK-LABEL: define i8 @test_vqshlub_n_s8(i8 %a) #0 { 18952 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> undef, i8 %a, i64 0 18953 // CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>) 18954 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0 18955 // CHECK: ret i8 [[TMP1]] 18956 int8_t test_vqshlub_n_s8(int8_t a) { 18957 return (int8_t)vqshlub_n_s8(a, 7); 18958 } 18959 18960 // CHECK-LABEL: define i16 @test_vqshluh_n_s16(i16 %a) #0 { 18961 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 18962 // CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 undef, i16 undef, i16 undef>) 18963 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0 18964 // CHECK: ret i16 [[TMP1]] 18965 int16_t test_vqshluh_n_s16(int16_t a) { 18966 return (int16_t)vqshluh_n_s16(a, 15); 18967 } 18968 18969 // CHECK-LABEL: define i32 @test_vqshlus_n_s32(i32 %a) #0 { 18970 // CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31) 18971 // CHECK: ret i32 [[VQSHLUS_N_S32]] 18972 int32_t test_vqshlus_n_s32(int32_t a) { 18973 return (int32_t)vqshlus_n_s32(a, 31); 18974 } 18975 18976 // CHECK-LABEL: define i64 @test_vqshlud_n_s64(i64 %a) #0 { 18977 // CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63) 18978 // CHECK: ret i64 [[VQSHLU_N]] 18979 int64_t test_vqshlud_n_s64(int64_t a) { 18980 return (int64_t)vqshlud_n_s64(a, 63); 18981 } 18982 18983 // CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 { 18984 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 18985 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 18986 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>) 18987 // CHECK: ret <1 x i64> [[VQSHLU_N1]] 18988 uint64x1_t test_vqshlu_n_s64(int64x1_t a) { 18989 return vqshlu_n_s64(a, 1); 18990 } 18991 18992 // CHECK-LABEL: define i64 @test_vsrid_n_s64(i64 %a, i64 %b) #0 { 18993 // CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> 18994 // CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> 18995 // CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63) 18996 // CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64 18997 // CHECK: ret i64 [[VSRID_N_S643]] 18998 int64_t test_vsrid_n_s64(int64_t a, int64_t b) { 18999 return (int64_t)vsrid_n_s64(a, b, 63); 19000 } 19001 19002 // CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 19003 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19004 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 19005 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 19006 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 19007 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1) 19008 // CHECK: ret <1 x i64> [[VSRI_N2]] 19009 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { 19010 return vsri_n_s64(a, b, 1); 19011 } 19012 19013 // CHECK-LABEL: define i64 @test_vsrid_n_u64(i64 %a, i64 %b) #0 { 19014 // CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> 19015 // CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> 19016 // CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63) 19017 // CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64 19018 // CHECK: ret i64 [[VSRID_N_U643]] 19019 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) { 19020 return (uint64_t)vsrid_n_u64(a, b, 63); 19021 } 19022 19023 // CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 19024 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19025 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 19026 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 19027 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 19028 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1) 19029 // CHECK: ret <1 x i64> [[VSRI_N2]] 19030 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { 19031 return vsri_n_u64(a, b, 1); 19032 } 19033 19034 // CHECK-LABEL: define i64 @test_vslid_n_s64(i64 %a, i64 %b) #0 { 19035 // CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> 19036 // CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> 19037 // CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63) 19038 // CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64 19039 // CHECK: ret i64 [[VSLID_N_S643]] 19040 int64_t test_vslid_n_s64(int64_t a, int64_t b) { 19041 return (int64_t)vslid_n_s64(a, b, 63); 19042 } 19043 19044 // CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 19045 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19046 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 19047 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 19048 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 19049 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1) 19050 // CHECK: ret <1 x i64> [[VSLI_N2]] 19051 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { 19052 return vsli_n_s64(a, b, 1); 19053 } 19054 19055 // CHECK-LABEL: define i64 @test_vslid_n_u64(i64 %a, i64 %b) #0 { 19056 // CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> 19057 // CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> 19058 // CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63) 19059 // CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64 19060 // CHECK: ret i64 [[VSLID_N_U643]] 19061 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) { 19062 return (uint64_t)vslid_n_u64(a, b, 63); 19063 } 19064 19065 // CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 19066 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19067 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 19068 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 19069 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 19070 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1) 19071 // CHECK: ret <1 x i64> [[VSLI_N2]] 19072 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { 19073 return vsli_n_u64(a, b, 1); 19074 } 19075 19076 // CHECK-LABEL: define i8 @test_vqshrnh_n_s16(i16 %a) #0 { 19077 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19078 // CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 19079 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0 19080 // CHECK: ret i8 [[TMP1]] 19081 int8_t test_vqshrnh_n_s16(int16_t a) { 19082 return (int8_t)vqshrnh_n_s16(a, 8); 19083 } 19084 19085 // CHECK-LABEL: define i16 @test_vqshrns_n_s32(i32 %a) #0 { 19086 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19087 // CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 19088 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0 19089 // CHECK: ret i16 [[TMP1]] 19090 int16_t test_vqshrns_n_s32(int32_t a) { 19091 return (int16_t)vqshrns_n_s32(a, 16); 19092 } 19093 19094 // CHECK-LABEL: define i32 @test_vqshrnd_n_s64(i64 %a) #0 { 19095 // CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32) 19096 // CHECK: ret i32 [[VQSHRND_N_S64]] 19097 int32_t test_vqshrnd_n_s64(int64_t a) { 19098 return (int32_t)vqshrnd_n_s64(a, 32); 19099 } 19100 19101 // CHECK-LABEL: define i8 @test_vqshrnh_n_u16(i16 %a) #0 { 19102 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19103 // CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 19104 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0 19105 // CHECK: ret i8 [[TMP1]] 19106 uint8_t test_vqshrnh_n_u16(uint16_t a) { 19107 return (uint8_t)vqshrnh_n_u16(a, 8); 19108 } 19109 19110 // CHECK-LABEL: define i16 @test_vqshrns_n_u32(i32 %a) #0 { 19111 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19112 // CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 19113 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0 19114 // CHECK: ret i16 [[TMP1]] 19115 uint16_t test_vqshrns_n_u32(uint32_t a) { 19116 return (uint16_t)vqshrns_n_u32(a, 16); 19117 } 19118 19119 // CHECK-LABEL: define i32 @test_vqshrnd_n_u64(i64 %a) #0 { 19120 // CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32) 19121 // CHECK: ret i32 [[VQSHRND_N_U64]] 19122 uint32_t test_vqshrnd_n_u64(uint64_t a) { 19123 return (uint32_t)vqshrnd_n_u64(a, 32); 19124 } 19125 19126 // CHECK-LABEL: define i8 @test_vqrshrnh_n_s16(i16 %a) #0 { 19127 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19128 // CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 19129 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0 19130 // CHECK: ret i8 [[TMP1]] 19131 int8_t test_vqrshrnh_n_s16(int16_t a) { 19132 return (int8_t)vqrshrnh_n_s16(a, 8); 19133 } 19134 19135 // CHECK-LABEL: define i16 @test_vqrshrns_n_s32(i32 %a) #0 { 19136 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19137 // CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 19138 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0 19139 // CHECK: ret i16 [[TMP1]] 19140 int16_t test_vqrshrns_n_s32(int32_t a) { 19141 return (int16_t)vqrshrns_n_s32(a, 16); 19142 } 19143 19144 // CHECK-LABEL: define i32 @test_vqrshrnd_n_s64(i64 %a) #0 { 19145 // CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32) 19146 // CHECK: ret i32 [[VQRSHRND_N_S64]] 19147 int32_t test_vqrshrnd_n_s64(int64_t a) { 19148 return (int32_t)vqrshrnd_n_s64(a, 32); 19149 } 19150 19151 // CHECK-LABEL: define i8 @test_vqrshrnh_n_u16(i16 %a) #0 { 19152 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19153 // CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 19154 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0 19155 // CHECK: ret i8 [[TMP1]] 19156 uint8_t test_vqrshrnh_n_u16(uint16_t a) { 19157 return (uint8_t)vqrshrnh_n_u16(a, 8); 19158 } 19159 19160 // CHECK-LABEL: define i16 @test_vqrshrns_n_u32(i32 %a) #0 { 19161 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19162 // CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 19163 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0 19164 // CHECK: ret i16 [[TMP1]] 19165 uint16_t test_vqrshrns_n_u32(uint32_t a) { 19166 return (uint16_t)vqrshrns_n_u32(a, 16); 19167 } 19168 19169 // CHECK-LABEL: define i32 @test_vqrshrnd_n_u64(i64 %a) #0 { 19170 // CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32) 19171 // CHECK: ret i32 [[VQRSHRND_N_U64]] 19172 uint32_t test_vqrshrnd_n_u64(uint64_t a) { 19173 return (uint32_t)vqrshrnd_n_u64(a, 32); 19174 } 19175 19176 // CHECK-LABEL: define i8 @test_vqshrunh_n_s16(i16 %a) #0 { 19177 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19178 // CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8) 19179 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0 19180 // CHECK: ret i8 [[TMP1]] 19181 int8_t test_vqshrunh_n_s16(int16_t a) { 19182 return (int8_t)vqshrunh_n_s16(a, 8); 19183 } 19184 19185 // CHECK-LABEL: define i16 @test_vqshruns_n_s32(i32 %a) #0 { 19186 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19187 // CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16) 19188 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0 19189 // CHECK: ret i16 [[TMP1]] 19190 int16_t test_vqshruns_n_s32(int32_t a) { 19191 return (int16_t)vqshruns_n_s32(a, 16); 19192 } 19193 19194 // CHECK-LABEL: define i32 @test_vqshrund_n_s64(i64 %a) #0 { 19195 // CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32) 19196 // CHECK: ret i32 [[VQSHRUND_N_S64]] 19197 int32_t test_vqshrund_n_s64(int64_t a) { 19198 return (int32_t)vqshrund_n_s64(a, 32); 19199 } 19200 19201 // CHECK-LABEL: define i8 @test_vqrshrunh_n_s16(i16 %a) #0 { 19202 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 %a, i64 0 19203 // CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8) 19204 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0 19205 // CHECK: ret i8 [[TMP1]] 19206 int8_t test_vqrshrunh_n_s16(int16_t a) { 19207 return (int8_t)vqrshrunh_n_s16(a, 8); 19208 } 19209 19210 // CHECK-LABEL: define i16 @test_vqrshruns_n_s32(i32 %a) #0 { 19211 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 %a, i64 0 19212 // CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16) 19213 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0 19214 // CHECK: ret i16 [[TMP1]] 19215 int16_t test_vqrshruns_n_s32(int32_t a) { 19216 return (int16_t)vqrshruns_n_s32(a, 16); 19217 } 19218 19219 // CHECK-LABEL: define i32 @test_vqrshrund_n_s64(i64 %a) #0 { 19220 // CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32) 19221 // CHECK: ret i32 [[VQRSHRUND_N_S64]] 19222 int32_t test_vqrshrund_n_s64(int64_t a) { 19223 return (int32_t)vqrshrund_n_s64(a, 32); 19224 } 19225 19226 // CHECK-LABEL: define float @test_vcvts_n_f32_s32(i32 %a) #0 { 19227 // CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1) 19228 // CHECK: ret float [[VCVTS_N_F32_S32]] 19229 float32_t test_vcvts_n_f32_s32(int32_t a) { 19230 return vcvts_n_f32_s32(a, 1); 19231 } 19232 19233 // CHECK-LABEL: define double @test_vcvtd_n_f64_s64(i64 %a) #0 { 19234 // CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1) 19235 // CHECK: ret double [[VCVTD_N_F64_S64]] 19236 float64_t test_vcvtd_n_f64_s64(int64_t a) { 19237 return vcvtd_n_f64_s64(a, 1); 19238 } 19239 19240 // CHECK-LABEL: define float @test_vcvts_n_f32_u32(i32 %a) #0 { 19241 // CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32) 19242 // CHECK: ret float [[VCVTS_N_F32_U32]] 19243 float32_t test_vcvts_n_f32_u32(uint32_t a) { 19244 return vcvts_n_f32_u32(a, 32); 19245 } 19246 19247 // CHECK-LABEL: define double @test_vcvtd_n_f64_u64(i64 %a) #0 { 19248 // CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64) 19249 // CHECK: ret double [[VCVTD_N_F64_U64]] 19250 float64_t test_vcvtd_n_f64_u64(uint64_t a) { 19251 return vcvtd_n_f64_u64(a, 64); 19252 } 19253 19254 // CHECK-LABEL: define i32 @test_vcvts_n_s32_f32(float %a) #0 { 19255 // CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1) 19256 // CHECK: ret i32 [[VCVTS_N_S32_F32]] 19257 int32_t test_vcvts_n_s32_f32(float32_t a) { 19258 return (int32_t)vcvts_n_s32_f32(a, 1); 19259 } 19260 19261 // CHECK-LABEL: define i64 @test_vcvtd_n_s64_f64(double %a) #0 { 19262 // CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1) 19263 // CHECK: ret i64 [[VCVTD_N_S64_F64]] 19264 int64_t test_vcvtd_n_s64_f64(float64_t a) { 19265 return (int64_t)vcvtd_n_s64_f64(a, 1); 19266 } 19267 19268 // CHECK-LABEL: define i32 @test_vcvts_n_u32_f32(float %a) #0 { 19269 // CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32) 19270 // CHECK: ret i32 [[VCVTS_N_U32_F32]] 19271 uint32_t test_vcvts_n_u32_f32(float32_t a) { 19272 return (uint32_t)vcvts_n_u32_f32(a, 32); 19273 } 19274 19275 // CHECK-LABEL: define i64 @test_vcvtd_n_u64_f64(double %a) #0 { 19276 // CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64) 19277 // CHECK: ret i64 [[VCVTD_N_U64_F64]] 19278 uint64_t test_vcvtd_n_u64_f64(float64_t a) { 19279 return (uint64_t)vcvtd_n_u64_f64(a, 64); 19280 } 19281 19282 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 { 19283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19284 // CHECK: ret <8 x i8> [[TMP0]] 19285 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) { 19286 return vreinterpret_s8_s16(a); 19287 } 19288 19289 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 { 19290 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 19291 // CHECK: ret <8 x i8> [[TMP0]] 19292 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) { 19293 return vreinterpret_s8_s32(a); 19294 } 19295 19296 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 { 19297 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19298 // CHECK: ret <8 x i8> [[TMP0]] 19299 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) { 19300 return vreinterpret_s8_s64(a); 19301 } 19302 19303 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 { 19304 // CHECK: ret <8 x i8> %a 19305 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) { 19306 return vreinterpret_s8_u8(a); 19307 } 19308 19309 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 { 19310 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19311 // CHECK: ret <8 x i8> [[TMP0]] 19312 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) { 19313 return vreinterpret_s8_u16(a); 19314 } 19315 19316 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 { 19317 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 19318 // CHECK: ret <8 x i8> [[TMP0]] 19319 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) { 19320 return vreinterpret_s8_u32(a); 19321 } 19322 19323 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 { 19324 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19325 // CHECK: ret <8 x i8> [[TMP0]] 19326 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) { 19327 return vreinterpret_s8_u64(a); 19328 } 19329 19330 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 { 19331 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 19332 // CHECK: ret <8 x i8> [[TMP0]] 19333 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) { 19334 return vreinterpret_s8_f16(a); 19335 } 19336 19337 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 { 19338 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 19339 // CHECK: ret <8 x i8> [[TMP0]] 19340 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) { 19341 return vreinterpret_s8_f32(a); 19342 } 19343 19344 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f64(<1 x double> %a) #0 { 19345 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 19346 // CHECK: ret <8 x i8> [[TMP0]] 19347 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) { 19348 return vreinterpret_s8_f64(a); 19349 } 19350 19351 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 { 19352 // CHECK: ret <8 x i8> %a 19353 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) { 19354 return vreinterpret_s8_p8(a); 19355 } 19356 19357 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 { 19358 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19359 // CHECK: ret <8 x i8> [[TMP0]] 19360 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) { 19361 return vreinterpret_s8_p16(a); 19362 } 19363 19364 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p64(<1 x i64> %a) #0 { 19365 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19366 // CHECK: ret <8 x i8> [[TMP0]] 19367 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) { 19368 return vreinterpret_s8_p64(a); 19369 } 19370 19371 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 { 19372 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19373 // CHECK: ret <4 x i16> [[TMP0]] 19374 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) { 19375 return vreinterpret_s16_s8(a); 19376 } 19377 19378 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 { 19379 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 19380 // CHECK: ret <4 x i16> [[TMP0]] 19381 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) { 19382 return vreinterpret_s16_s32(a); 19383 } 19384 19385 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 { 19386 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19387 // CHECK: ret <4 x i16> [[TMP0]] 19388 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) { 19389 return vreinterpret_s16_s64(a); 19390 } 19391 19392 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 { 19393 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19394 // CHECK: ret <4 x i16> [[TMP0]] 19395 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) { 19396 return vreinterpret_s16_u8(a); 19397 } 19398 19399 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 { 19400 // CHECK: ret <4 x i16> %a 19401 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) { 19402 return vreinterpret_s16_u16(a); 19403 } 19404 19405 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 { 19406 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 19407 // CHECK: ret <4 x i16> [[TMP0]] 19408 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) { 19409 return vreinterpret_s16_u32(a); 19410 } 19411 19412 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 { 19413 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19414 // CHECK: ret <4 x i16> [[TMP0]] 19415 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) { 19416 return vreinterpret_s16_u64(a); 19417 } 19418 19419 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 { 19420 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 19421 // CHECK: ret <4 x i16> [[TMP0]] 19422 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) { 19423 return vreinterpret_s16_f16(a); 19424 } 19425 19426 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 { 19427 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 19428 // CHECK: ret <4 x i16> [[TMP0]] 19429 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) { 19430 return vreinterpret_s16_f32(a); 19431 } 19432 19433 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f64(<1 x double> %a) #0 { 19434 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 19435 // CHECK: ret <4 x i16> [[TMP0]] 19436 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) { 19437 return vreinterpret_s16_f64(a); 19438 } 19439 19440 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 { 19441 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19442 // CHECK: ret <4 x i16> [[TMP0]] 19443 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) { 19444 return vreinterpret_s16_p8(a); 19445 } 19446 19447 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 { 19448 // CHECK: ret <4 x i16> %a 19449 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) { 19450 return vreinterpret_s16_p16(a); 19451 } 19452 19453 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p64(<1 x i64> %a) #0 { 19454 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19455 // CHECK: ret <4 x i16> [[TMP0]] 19456 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) { 19457 return vreinterpret_s16_p64(a); 19458 } 19459 19460 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 { 19461 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19462 // CHECK: ret <2 x i32> [[TMP0]] 19463 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) { 19464 return vreinterpret_s32_s8(a); 19465 } 19466 19467 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 { 19468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19469 // CHECK: ret <2 x i32> [[TMP0]] 19470 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) { 19471 return vreinterpret_s32_s16(a); 19472 } 19473 19474 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 { 19475 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19476 // CHECK: ret <2 x i32> [[TMP0]] 19477 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) { 19478 return vreinterpret_s32_s64(a); 19479 } 19480 19481 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 { 19482 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19483 // CHECK: ret <2 x i32> [[TMP0]] 19484 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) { 19485 return vreinterpret_s32_u8(a); 19486 } 19487 19488 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 { 19489 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19490 // CHECK: ret <2 x i32> [[TMP0]] 19491 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) { 19492 return vreinterpret_s32_u16(a); 19493 } 19494 19495 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 { 19496 // CHECK: ret <2 x i32> %a 19497 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) { 19498 return vreinterpret_s32_u32(a); 19499 } 19500 19501 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 { 19502 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19503 // CHECK: ret <2 x i32> [[TMP0]] 19504 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) { 19505 return vreinterpret_s32_u64(a); 19506 } 19507 19508 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 { 19509 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 19510 // CHECK: ret <2 x i32> [[TMP0]] 19511 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) { 19512 return vreinterpret_s32_f16(a); 19513 } 19514 19515 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 { 19516 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 19517 // CHECK: ret <2 x i32> [[TMP0]] 19518 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) { 19519 return vreinterpret_s32_f32(a); 19520 } 19521 19522 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f64(<1 x double> %a) #0 { 19523 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32> 19524 // CHECK: ret <2 x i32> [[TMP0]] 19525 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) { 19526 return vreinterpret_s32_f64(a); 19527 } 19528 19529 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 { 19530 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19531 // CHECK: ret <2 x i32> [[TMP0]] 19532 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) { 19533 return vreinterpret_s32_p8(a); 19534 } 19535 19536 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 { 19537 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19538 // CHECK: ret <2 x i32> [[TMP0]] 19539 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) { 19540 return vreinterpret_s32_p16(a); 19541 } 19542 19543 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p64(<1 x i64> %a) #0 { 19544 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19545 // CHECK: ret <2 x i32> [[TMP0]] 19546 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) { 19547 return vreinterpret_s32_p64(a); 19548 } 19549 19550 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 { 19551 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19552 // CHECK: ret <1 x i64> [[TMP0]] 19553 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) { 19554 return vreinterpret_s64_s8(a); 19555 } 19556 19557 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 { 19558 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19559 // CHECK: ret <1 x i64> [[TMP0]] 19560 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) { 19561 return vreinterpret_s64_s16(a); 19562 } 19563 19564 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 { 19565 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 19566 // CHECK: ret <1 x i64> [[TMP0]] 19567 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) { 19568 return vreinterpret_s64_s32(a); 19569 } 19570 19571 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 { 19572 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19573 // CHECK: ret <1 x i64> [[TMP0]] 19574 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) { 19575 return vreinterpret_s64_u8(a); 19576 } 19577 19578 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 { 19579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19580 // CHECK: ret <1 x i64> [[TMP0]] 19581 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) { 19582 return vreinterpret_s64_u16(a); 19583 } 19584 19585 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 { 19586 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 19587 // CHECK: ret <1 x i64> [[TMP0]] 19588 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) { 19589 return vreinterpret_s64_u32(a); 19590 } 19591 19592 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 { 19593 // CHECK: ret <1 x i64> %a 19594 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) { 19595 return vreinterpret_s64_u64(a); 19596 } 19597 19598 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 { 19599 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 19600 // CHECK: ret <1 x i64> [[TMP0]] 19601 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) { 19602 return vreinterpret_s64_f16(a); 19603 } 19604 19605 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 { 19606 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 19607 // CHECK: ret <1 x i64> [[TMP0]] 19608 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) { 19609 return vreinterpret_s64_f32(a); 19610 } 19611 19612 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f64(<1 x double> %a) #0 { 19613 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 19614 // CHECK: ret <1 x i64> [[TMP0]] 19615 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) { 19616 return vreinterpret_s64_f64(a); 19617 } 19618 19619 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 { 19620 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19621 // CHECK: ret <1 x i64> [[TMP0]] 19622 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) { 19623 return vreinterpret_s64_p8(a); 19624 } 19625 19626 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 { 19627 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19628 // CHECK: ret <1 x i64> [[TMP0]] 19629 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) { 19630 return vreinterpret_s64_p16(a); 19631 } 19632 19633 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p64(<1 x i64> %a) #0 { 19634 // CHECK: ret <1 x i64> %a 19635 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) { 19636 return vreinterpret_s64_p64(a); 19637 } 19638 19639 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 { 19640 // CHECK: ret <8 x i8> %a 19641 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) { 19642 return vreinterpret_u8_s8(a); 19643 } 19644 19645 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 { 19646 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19647 // CHECK: ret <8 x i8> [[TMP0]] 19648 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) { 19649 return vreinterpret_u8_s16(a); 19650 } 19651 19652 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 { 19653 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 19654 // CHECK: ret <8 x i8> [[TMP0]] 19655 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) { 19656 return vreinterpret_u8_s32(a); 19657 } 19658 19659 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 { 19660 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19661 // CHECK: ret <8 x i8> [[TMP0]] 19662 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) { 19663 return vreinterpret_u8_s64(a); 19664 } 19665 19666 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 { 19667 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19668 // CHECK: ret <8 x i8> [[TMP0]] 19669 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) { 19670 return vreinterpret_u8_u16(a); 19671 } 19672 19673 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 { 19674 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 19675 // CHECK: ret <8 x i8> [[TMP0]] 19676 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) { 19677 return vreinterpret_u8_u32(a); 19678 } 19679 19680 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 { 19681 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19682 // CHECK: ret <8 x i8> [[TMP0]] 19683 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) { 19684 return vreinterpret_u8_u64(a); 19685 } 19686 19687 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 { 19688 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 19689 // CHECK: ret <8 x i8> [[TMP0]] 19690 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) { 19691 return vreinterpret_u8_f16(a); 19692 } 19693 19694 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 { 19695 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 19696 // CHECK: ret <8 x i8> [[TMP0]] 19697 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) { 19698 return vreinterpret_u8_f32(a); 19699 } 19700 19701 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f64(<1 x double> %a) #0 { 19702 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 19703 // CHECK: ret <8 x i8> [[TMP0]] 19704 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) { 19705 return vreinterpret_u8_f64(a); 19706 } 19707 19708 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 { 19709 // CHECK: ret <8 x i8> %a 19710 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) { 19711 return vreinterpret_u8_p8(a); 19712 } 19713 19714 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 { 19715 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 19716 // CHECK: ret <8 x i8> [[TMP0]] 19717 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) { 19718 return vreinterpret_u8_p16(a); 19719 } 19720 19721 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p64(<1 x i64> %a) #0 { 19722 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 19723 // CHECK: ret <8 x i8> [[TMP0]] 19724 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) { 19725 return vreinterpret_u8_p64(a); 19726 } 19727 19728 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 { 19729 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19730 // CHECK: ret <4 x i16> [[TMP0]] 19731 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) { 19732 return vreinterpret_u16_s8(a); 19733 } 19734 19735 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 { 19736 // CHECK: ret <4 x i16> %a 19737 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) { 19738 return vreinterpret_u16_s16(a); 19739 } 19740 19741 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 { 19742 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 19743 // CHECK: ret <4 x i16> [[TMP0]] 19744 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) { 19745 return vreinterpret_u16_s32(a); 19746 } 19747 19748 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 { 19749 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19750 // CHECK: ret <4 x i16> [[TMP0]] 19751 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) { 19752 return vreinterpret_u16_s64(a); 19753 } 19754 19755 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 { 19756 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19757 // CHECK: ret <4 x i16> [[TMP0]] 19758 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) { 19759 return vreinterpret_u16_u8(a); 19760 } 19761 19762 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 { 19763 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 19764 // CHECK: ret <4 x i16> [[TMP0]] 19765 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) { 19766 return vreinterpret_u16_u32(a); 19767 } 19768 19769 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 { 19770 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19771 // CHECK: ret <4 x i16> [[TMP0]] 19772 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) { 19773 return vreinterpret_u16_u64(a); 19774 } 19775 19776 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 { 19777 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 19778 // CHECK: ret <4 x i16> [[TMP0]] 19779 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) { 19780 return vreinterpret_u16_f16(a); 19781 } 19782 19783 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 { 19784 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 19785 // CHECK: ret <4 x i16> [[TMP0]] 19786 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) { 19787 return vreinterpret_u16_f32(a); 19788 } 19789 19790 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f64(<1 x double> %a) #0 { 19791 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 19792 // CHECK: ret <4 x i16> [[TMP0]] 19793 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) { 19794 return vreinterpret_u16_f64(a); 19795 } 19796 19797 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 { 19798 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 19799 // CHECK: ret <4 x i16> [[TMP0]] 19800 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) { 19801 return vreinterpret_u16_p8(a); 19802 } 19803 19804 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 { 19805 // CHECK: ret <4 x i16> %a 19806 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) { 19807 return vreinterpret_u16_p16(a); 19808 } 19809 19810 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p64(<1 x i64> %a) #0 { 19811 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 19812 // CHECK: ret <4 x i16> [[TMP0]] 19813 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) { 19814 return vreinterpret_u16_p64(a); 19815 } 19816 19817 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 { 19818 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19819 // CHECK: ret <2 x i32> [[TMP0]] 19820 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) { 19821 return vreinterpret_u32_s8(a); 19822 } 19823 19824 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 { 19825 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19826 // CHECK: ret <2 x i32> [[TMP0]] 19827 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) { 19828 return vreinterpret_u32_s16(a); 19829 } 19830 19831 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 { 19832 // CHECK: ret <2 x i32> %a 19833 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) { 19834 return vreinterpret_u32_s32(a); 19835 } 19836 19837 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 { 19838 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19839 // CHECK: ret <2 x i32> [[TMP0]] 19840 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) { 19841 return vreinterpret_u32_s64(a); 19842 } 19843 19844 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 { 19845 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19846 // CHECK: ret <2 x i32> [[TMP0]] 19847 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) { 19848 return vreinterpret_u32_u8(a); 19849 } 19850 19851 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 { 19852 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19853 // CHECK: ret <2 x i32> [[TMP0]] 19854 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) { 19855 return vreinterpret_u32_u16(a); 19856 } 19857 19858 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 { 19859 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19860 // CHECK: ret <2 x i32> [[TMP0]] 19861 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) { 19862 return vreinterpret_u32_u64(a); 19863 } 19864 19865 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 { 19866 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 19867 // CHECK: ret <2 x i32> [[TMP0]] 19868 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) { 19869 return vreinterpret_u32_f16(a); 19870 } 19871 19872 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 { 19873 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 19874 // CHECK: ret <2 x i32> [[TMP0]] 19875 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) { 19876 return vreinterpret_u32_f32(a); 19877 } 19878 19879 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f64(<1 x double> %a) #0 { 19880 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32> 19881 // CHECK: ret <2 x i32> [[TMP0]] 19882 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) { 19883 return vreinterpret_u32_f64(a); 19884 } 19885 19886 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 { 19887 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 19888 // CHECK: ret <2 x i32> [[TMP0]] 19889 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) { 19890 return vreinterpret_u32_p8(a); 19891 } 19892 19893 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 { 19894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 19895 // CHECK: ret <2 x i32> [[TMP0]] 19896 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) { 19897 return vreinterpret_u32_p16(a); 19898 } 19899 19900 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p64(<1 x i64> %a) #0 { 19901 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 19902 // CHECK: ret <2 x i32> [[TMP0]] 19903 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) { 19904 return vreinterpret_u32_p64(a); 19905 } 19906 19907 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 { 19908 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19909 // CHECK: ret <1 x i64> [[TMP0]] 19910 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) { 19911 return vreinterpret_u64_s8(a); 19912 } 19913 19914 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 { 19915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19916 // CHECK: ret <1 x i64> [[TMP0]] 19917 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) { 19918 return vreinterpret_u64_s16(a); 19919 } 19920 19921 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 { 19922 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 19923 // CHECK: ret <1 x i64> [[TMP0]] 19924 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) { 19925 return vreinterpret_u64_s32(a); 19926 } 19927 19928 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 { 19929 // CHECK: ret <1 x i64> %a 19930 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) { 19931 return vreinterpret_u64_s64(a); 19932 } 19933 19934 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 { 19935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19936 // CHECK: ret <1 x i64> [[TMP0]] 19937 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) { 19938 return vreinterpret_u64_u8(a); 19939 } 19940 19941 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 { 19942 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19943 // CHECK: ret <1 x i64> [[TMP0]] 19944 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) { 19945 return vreinterpret_u64_u16(a); 19946 } 19947 19948 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 { 19949 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 19950 // CHECK: ret <1 x i64> [[TMP0]] 19951 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) { 19952 return vreinterpret_u64_u32(a); 19953 } 19954 19955 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 { 19956 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 19957 // CHECK: ret <1 x i64> [[TMP0]] 19958 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) { 19959 return vreinterpret_u64_f16(a); 19960 } 19961 19962 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 { 19963 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 19964 // CHECK: ret <1 x i64> [[TMP0]] 19965 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) { 19966 return vreinterpret_u64_f32(a); 19967 } 19968 19969 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f64(<1 x double> %a) #0 { 19970 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 19971 // CHECK: ret <1 x i64> [[TMP0]] 19972 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) { 19973 return vreinterpret_u64_f64(a); 19974 } 19975 19976 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 { 19977 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 19978 // CHECK: ret <1 x i64> [[TMP0]] 19979 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) { 19980 return vreinterpret_u64_p8(a); 19981 } 19982 19983 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 { 19984 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 19985 // CHECK: ret <1 x i64> [[TMP0]] 19986 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) { 19987 return vreinterpret_u64_p16(a); 19988 } 19989 19990 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p64(<1 x i64> %a) #0 { 19991 // CHECK: ret <1 x i64> %a 19992 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) { 19993 return vreinterpret_u64_p64(a); 19994 } 19995 19996 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 { 19997 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 19998 // CHECK: ret <4 x half> [[TMP0]] 19999 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) { 20000 return vreinterpret_f16_s8(a); 20001 } 20002 20003 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 { 20004 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 20005 // CHECK: ret <4 x half> [[TMP0]] 20006 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) { 20007 return vreinterpret_f16_s16(a); 20008 } 20009 20010 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 { 20011 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 20012 // CHECK: ret <4 x half> [[TMP0]] 20013 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) { 20014 return vreinterpret_f16_s32(a); 20015 } 20016 20017 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 { 20018 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 20019 // CHECK: ret <4 x half> [[TMP0]] 20020 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) { 20021 return vreinterpret_f16_s64(a); 20022 } 20023 20024 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 { 20025 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 20026 // CHECK: ret <4 x half> [[TMP0]] 20027 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) { 20028 return vreinterpret_f16_u8(a); 20029 } 20030 20031 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 { 20032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 20033 // CHECK: ret <4 x half> [[TMP0]] 20034 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) { 20035 return vreinterpret_f16_u16(a); 20036 } 20037 20038 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 { 20039 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 20040 // CHECK: ret <4 x half> [[TMP0]] 20041 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) { 20042 return vreinterpret_f16_u32(a); 20043 } 20044 20045 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 { 20046 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 20047 // CHECK: ret <4 x half> [[TMP0]] 20048 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) { 20049 return vreinterpret_f16_u64(a); 20050 } 20051 20052 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 { 20053 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half> 20054 // CHECK: ret <4 x half> [[TMP0]] 20055 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) { 20056 return vreinterpret_f16_f32(a); 20057 } 20058 20059 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f64(<1 x double> %a) #0 { 20060 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half> 20061 // CHECK: ret <4 x half> [[TMP0]] 20062 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) { 20063 return vreinterpret_f16_f64(a); 20064 } 20065 20066 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 { 20067 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 20068 // CHECK: ret <4 x half> [[TMP0]] 20069 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) { 20070 return vreinterpret_f16_p8(a); 20071 } 20072 20073 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 { 20074 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 20075 // CHECK: ret <4 x half> [[TMP0]] 20076 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) { 20077 return vreinterpret_f16_p16(a); 20078 } 20079 20080 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p64(<1 x i64> %a) #0 { 20081 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 20082 // CHECK: ret <4 x half> [[TMP0]] 20083 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) { 20084 return vreinterpret_f16_p64(a); 20085 } 20086 20087 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 { 20088 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 20089 // CHECK: ret <2 x float> [[TMP0]] 20090 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) { 20091 return vreinterpret_f32_s8(a); 20092 } 20093 20094 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 { 20095 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 20096 // CHECK: ret <2 x float> [[TMP0]] 20097 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) { 20098 return vreinterpret_f32_s16(a); 20099 } 20100 20101 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 { 20102 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 20103 // CHECK: ret <2 x float> [[TMP0]] 20104 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) { 20105 return vreinterpret_f32_s32(a); 20106 } 20107 20108 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 { 20109 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 20110 // CHECK: ret <2 x float> [[TMP0]] 20111 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) { 20112 return vreinterpret_f32_s64(a); 20113 } 20114 20115 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 { 20116 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 20117 // CHECK: ret <2 x float> [[TMP0]] 20118 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) { 20119 return vreinterpret_f32_u8(a); 20120 } 20121 20122 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 { 20123 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 20124 // CHECK: ret <2 x float> [[TMP0]] 20125 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) { 20126 return vreinterpret_f32_u16(a); 20127 } 20128 20129 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 { 20130 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 20131 // CHECK: ret <2 x float> [[TMP0]] 20132 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) { 20133 return vreinterpret_f32_u32(a); 20134 } 20135 20136 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 { 20137 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 20138 // CHECK: ret <2 x float> [[TMP0]] 20139 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) { 20140 return vreinterpret_f32_u64(a); 20141 } 20142 20143 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 { 20144 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float> 20145 // CHECK: ret <2 x float> [[TMP0]] 20146 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) { 20147 return vreinterpret_f32_f16(a); 20148 } 20149 20150 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f64(<1 x double> %a) #0 { 20151 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float> 20152 // CHECK: ret <2 x float> [[TMP0]] 20153 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) { 20154 return vreinterpret_f32_f64(a); 20155 } 20156 20157 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 { 20158 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 20159 // CHECK: ret <2 x float> [[TMP0]] 20160 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) { 20161 return vreinterpret_f32_p8(a); 20162 } 20163 20164 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 { 20165 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 20166 // CHECK: ret <2 x float> [[TMP0]] 20167 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) { 20168 return vreinterpret_f32_p16(a); 20169 } 20170 20171 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p64(<1 x i64> %a) #0 { 20172 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 20173 // CHECK: ret <2 x float> [[TMP0]] 20174 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) { 20175 return vreinterpret_f32_p64(a); 20176 } 20177 20178 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s8(<8 x i8> %a) #0 { 20179 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 20180 // CHECK: ret <1 x double> [[TMP0]] 20181 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) { 20182 return vreinterpret_f64_s8(a); 20183 } 20184 20185 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s16(<4 x i16> %a) #0 { 20186 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 20187 // CHECK: ret <1 x double> [[TMP0]] 20188 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) { 20189 return vreinterpret_f64_s16(a); 20190 } 20191 20192 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s32(<2 x i32> %a) #0 { 20193 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double> 20194 // CHECK: ret <1 x double> [[TMP0]] 20195 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) { 20196 return vreinterpret_f64_s32(a); 20197 } 20198 20199 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_s64(<1 x i64> %a) #0 { 20200 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 20201 // CHECK: ret <1 x double> [[TMP0]] 20202 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) { 20203 return vreinterpret_f64_s64(a); 20204 } 20205 20206 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u8(<8 x i8> %a) #0 { 20207 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 20208 // CHECK: ret <1 x double> [[TMP0]] 20209 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) { 20210 return vreinterpret_f64_u8(a); 20211 } 20212 20213 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u16(<4 x i16> %a) #0 { 20214 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 20215 // CHECK: ret <1 x double> [[TMP0]] 20216 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) { 20217 return vreinterpret_f64_u16(a); 20218 } 20219 20220 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u32(<2 x i32> %a) #0 { 20221 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double> 20222 // CHECK: ret <1 x double> [[TMP0]] 20223 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) { 20224 return vreinterpret_f64_u32(a); 20225 } 20226 20227 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_u64(<1 x i64> %a) #0 { 20228 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 20229 // CHECK: ret <1 x double> [[TMP0]] 20230 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) { 20231 return vreinterpret_f64_u64(a); 20232 } 20233 20234 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f16(<4 x half> %a) #0 { 20235 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double> 20236 // CHECK: ret <1 x double> [[TMP0]] 20237 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) { 20238 return vreinterpret_f64_f16(a); 20239 } 20240 20241 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_f32(<2 x float> %a) #0 { 20242 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double> 20243 // CHECK: ret <1 x double> [[TMP0]] 20244 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) { 20245 return vreinterpret_f64_f32(a); 20246 } 20247 20248 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p8(<8 x i8> %a) #0 { 20249 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 20250 // CHECK: ret <1 x double> [[TMP0]] 20251 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) { 20252 return vreinterpret_f64_p8(a); 20253 } 20254 20255 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p16(<4 x i16> %a) #0 { 20256 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 20257 // CHECK: ret <1 x double> [[TMP0]] 20258 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) { 20259 return vreinterpret_f64_p16(a); 20260 } 20261 20262 // CHECK-LABEL: define <1 x double> @test_vreinterpret_f64_p64(<1 x i64> %a) #0 { 20263 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 20264 // CHECK: ret <1 x double> [[TMP0]] 20265 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) { 20266 return vreinterpret_f64_p64(a); 20267 } 20268 20269 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 { 20270 // CHECK: ret <8 x i8> %a 20271 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) { 20272 return vreinterpret_p8_s8(a); 20273 } 20274 20275 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 { 20276 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 20277 // CHECK: ret <8 x i8> [[TMP0]] 20278 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) { 20279 return vreinterpret_p8_s16(a); 20280 } 20281 20282 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 { 20283 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 20284 // CHECK: ret <8 x i8> [[TMP0]] 20285 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) { 20286 return vreinterpret_p8_s32(a); 20287 } 20288 20289 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 { 20290 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 20291 // CHECK: ret <8 x i8> [[TMP0]] 20292 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) { 20293 return vreinterpret_p8_s64(a); 20294 } 20295 20296 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 { 20297 // CHECK: ret <8 x i8> %a 20298 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) { 20299 return vreinterpret_p8_u8(a); 20300 } 20301 20302 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 { 20303 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 20304 // CHECK: ret <8 x i8> [[TMP0]] 20305 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) { 20306 return vreinterpret_p8_u16(a); 20307 } 20308 20309 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 { 20310 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 20311 // CHECK: ret <8 x i8> [[TMP0]] 20312 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) { 20313 return vreinterpret_p8_u32(a); 20314 } 20315 20316 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 { 20317 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 20318 // CHECK: ret <8 x i8> [[TMP0]] 20319 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) { 20320 return vreinterpret_p8_u64(a); 20321 } 20322 20323 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 { 20324 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 20325 // CHECK: ret <8 x i8> [[TMP0]] 20326 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) { 20327 return vreinterpret_p8_f16(a); 20328 } 20329 20330 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 { 20331 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 20332 // CHECK: ret <8 x i8> [[TMP0]] 20333 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) { 20334 return vreinterpret_p8_f32(a); 20335 } 20336 20337 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f64(<1 x double> %a) #0 { 20338 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 20339 // CHECK: ret <8 x i8> [[TMP0]] 20340 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) { 20341 return vreinterpret_p8_f64(a); 20342 } 20343 20344 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 { 20345 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 20346 // CHECK: ret <8 x i8> [[TMP0]] 20347 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) { 20348 return vreinterpret_p8_p16(a); 20349 } 20350 20351 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p64(<1 x i64> %a) #0 { 20352 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 20353 // CHECK: ret <8 x i8> [[TMP0]] 20354 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) { 20355 return vreinterpret_p8_p64(a); 20356 } 20357 20358 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 { 20359 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 20360 // CHECK: ret <4 x i16> [[TMP0]] 20361 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) { 20362 return vreinterpret_p16_s8(a); 20363 } 20364 20365 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 { 20366 // CHECK: ret <4 x i16> %a 20367 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) { 20368 return vreinterpret_p16_s16(a); 20369 } 20370 20371 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 { 20372 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 20373 // CHECK: ret <4 x i16> [[TMP0]] 20374 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) { 20375 return vreinterpret_p16_s32(a); 20376 } 20377 20378 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 { 20379 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 20380 // CHECK: ret <4 x i16> [[TMP0]] 20381 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) { 20382 return vreinterpret_p16_s64(a); 20383 } 20384 20385 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 { 20386 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 20387 // CHECK: ret <4 x i16> [[TMP0]] 20388 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) { 20389 return vreinterpret_p16_u8(a); 20390 } 20391 20392 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 { 20393 // CHECK: ret <4 x i16> %a 20394 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) { 20395 return vreinterpret_p16_u16(a); 20396 } 20397 20398 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 { 20399 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 20400 // CHECK: ret <4 x i16> [[TMP0]] 20401 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) { 20402 return vreinterpret_p16_u32(a); 20403 } 20404 20405 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 { 20406 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 20407 // CHECK: ret <4 x i16> [[TMP0]] 20408 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) { 20409 return vreinterpret_p16_u64(a); 20410 } 20411 20412 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 { 20413 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 20414 // CHECK: ret <4 x i16> [[TMP0]] 20415 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) { 20416 return vreinterpret_p16_f16(a); 20417 } 20418 20419 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 { 20420 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 20421 // CHECK: ret <4 x i16> [[TMP0]] 20422 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) { 20423 return vreinterpret_p16_f32(a); 20424 } 20425 20426 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f64(<1 x double> %a) #0 { 20427 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 20428 // CHECK: ret <4 x i16> [[TMP0]] 20429 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) { 20430 return vreinterpret_p16_f64(a); 20431 } 20432 20433 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 { 20434 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 20435 // CHECK: ret <4 x i16> [[TMP0]] 20436 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) { 20437 return vreinterpret_p16_p8(a); 20438 } 20439 20440 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p64(<1 x i64> %a) #0 { 20441 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 20442 // CHECK: ret <4 x i16> [[TMP0]] 20443 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) { 20444 return vreinterpret_p16_p64(a); 20445 } 20446 20447 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s8(<8 x i8> %a) #0 { 20448 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 20449 // CHECK: ret <1 x i64> [[TMP0]] 20450 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) { 20451 return vreinterpret_p64_s8(a); 20452 } 20453 20454 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s16(<4 x i16> %a) #0 { 20455 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 20456 // CHECK: ret <1 x i64> [[TMP0]] 20457 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) { 20458 return vreinterpret_p64_s16(a); 20459 } 20460 20461 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s32(<2 x i32> %a) #0 { 20462 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 20463 // CHECK: ret <1 x i64> [[TMP0]] 20464 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) { 20465 return vreinterpret_p64_s32(a); 20466 } 20467 20468 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_s64(<1 x i64> %a) #0 { 20469 // CHECK: ret <1 x i64> %a 20470 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) { 20471 return vreinterpret_p64_s64(a); 20472 } 20473 20474 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u8(<8 x i8> %a) #0 { 20475 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 20476 // CHECK: ret <1 x i64> [[TMP0]] 20477 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) { 20478 return vreinterpret_p64_u8(a); 20479 } 20480 20481 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u16(<4 x i16> %a) #0 { 20482 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 20483 // CHECK: ret <1 x i64> [[TMP0]] 20484 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) { 20485 return vreinterpret_p64_u16(a); 20486 } 20487 20488 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u32(<2 x i32> %a) #0 { 20489 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 20490 // CHECK: ret <1 x i64> [[TMP0]] 20491 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) { 20492 return vreinterpret_p64_u32(a); 20493 } 20494 20495 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_u64(<1 x i64> %a) #0 { 20496 // CHECK: ret <1 x i64> %a 20497 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) { 20498 return vreinterpret_p64_u64(a); 20499 } 20500 20501 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f16(<4 x half> %a) #0 { 20502 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 20503 // CHECK: ret <1 x i64> [[TMP0]] 20504 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) { 20505 return vreinterpret_p64_f16(a); 20506 } 20507 20508 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f32(<2 x float> %a) #0 { 20509 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 20510 // CHECK: ret <1 x i64> [[TMP0]] 20511 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) { 20512 return vreinterpret_p64_f32(a); 20513 } 20514 20515 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_f64(<1 x double> %a) #0 { 20516 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 20517 // CHECK: ret <1 x i64> [[TMP0]] 20518 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) { 20519 return vreinterpret_p64_f64(a); 20520 } 20521 20522 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p8(<8 x i8> %a) #0 { 20523 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 20524 // CHECK: ret <1 x i64> [[TMP0]] 20525 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) { 20526 return vreinterpret_p64_p8(a); 20527 } 20528 20529 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_p64_p16(<4 x i16> %a) #0 { 20530 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 20531 // CHECK: ret <1 x i64> [[TMP0]] 20532 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) { 20533 return vreinterpret_p64_p16(a); 20534 } 20535 20536 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 { 20537 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20538 // CHECK: ret <16 x i8> [[TMP0]] 20539 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) { 20540 return vreinterpretq_s8_s16(a); 20541 } 20542 20543 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 { 20544 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 20545 // CHECK: ret <16 x i8> [[TMP0]] 20546 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) { 20547 return vreinterpretq_s8_s32(a); 20548 } 20549 20550 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 { 20551 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20552 // CHECK: ret <16 x i8> [[TMP0]] 20553 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) { 20554 return vreinterpretq_s8_s64(a); 20555 } 20556 20557 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 { 20558 // CHECK: ret <16 x i8> %a 20559 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) { 20560 return vreinterpretq_s8_u8(a); 20561 } 20562 20563 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 { 20564 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20565 // CHECK: ret <16 x i8> [[TMP0]] 20566 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) { 20567 return vreinterpretq_s8_u16(a); 20568 } 20569 20570 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 { 20571 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 20572 // CHECK: ret <16 x i8> [[TMP0]] 20573 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) { 20574 return vreinterpretq_s8_u32(a); 20575 } 20576 20577 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 { 20578 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20579 // CHECK: ret <16 x i8> [[TMP0]] 20580 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) { 20581 return vreinterpretq_s8_u64(a); 20582 } 20583 20584 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 { 20585 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 20586 // CHECK: ret <16 x i8> [[TMP0]] 20587 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) { 20588 return vreinterpretq_s8_f16(a); 20589 } 20590 20591 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 { 20592 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 20593 // CHECK: ret <16 x i8> [[TMP0]] 20594 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) { 20595 return vreinterpretq_s8_f32(a); 20596 } 20597 20598 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f64(<2 x double> %a) #0 { 20599 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 20600 // CHECK: ret <16 x i8> [[TMP0]] 20601 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) { 20602 return vreinterpretq_s8_f64(a); 20603 } 20604 20605 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 { 20606 // CHECK: ret <16 x i8> %a 20607 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) { 20608 return vreinterpretq_s8_p8(a); 20609 } 20610 20611 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 { 20612 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20613 // CHECK: ret <16 x i8> [[TMP0]] 20614 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) { 20615 return vreinterpretq_s8_p16(a); 20616 } 20617 20618 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p64(<2 x i64> %a) #0 { 20619 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20620 // CHECK: ret <16 x i8> [[TMP0]] 20621 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) { 20622 return vreinterpretq_s8_p64(a); 20623 } 20624 20625 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 { 20626 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 20627 // CHECK: ret <8 x i16> [[TMP0]] 20628 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) { 20629 return vreinterpretq_s16_s8(a); 20630 } 20631 20632 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 { 20633 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 20634 // CHECK: ret <8 x i16> [[TMP0]] 20635 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) { 20636 return vreinterpretq_s16_s32(a); 20637 } 20638 20639 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 { 20640 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 20641 // CHECK: ret <8 x i16> [[TMP0]] 20642 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) { 20643 return vreinterpretq_s16_s64(a); 20644 } 20645 20646 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 { 20647 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 20648 // CHECK: ret <8 x i16> [[TMP0]] 20649 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) { 20650 return vreinterpretq_s16_u8(a); 20651 } 20652 20653 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 { 20654 // CHECK: ret <8 x i16> %a 20655 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) { 20656 return vreinterpretq_s16_u16(a); 20657 } 20658 20659 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 { 20660 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 20661 // CHECK: ret <8 x i16> [[TMP0]] 20662 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) { 20663 return vreinterpretq_s16_u32(a); 20664 } 20665 20666 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 { 20667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 20668 // CHECK: ret <8 x i16> [[TMP0]] 20669 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) { 20670 return vreinterpretq_s16_u64(a); 20671 } 20672 20673 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 { 20674 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 20675 // CHECK: ret <8 x i16> [[TMP0]] 20676 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) { 20677 return vreinterpretq_s16_f16(a); 20678 } 20679 20680 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 { 20681 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 20682 // CHECK: ret <8 x i16> [[TMP0]] 20683 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) { 20684 return vreinterpretq_s16_f32(a); 20685 } 20686 20687 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f64(<2 x double> %a) #0 { 20688 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 20689 // CHECK: ret <8 x i16> [[TMP0]] 20690 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) { 20691 return vreinterpretq_s16_f64(a); 20692 } 20693 20694 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 { 20695 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 20696 // CHECK: ret <8 x i16> [[TMP0]] 20697 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) { 20698 return vreinterpretq_s16_p8(a); 20699 } 20700 20701 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 { 20702 // CHECK: ret <8 x i16> %a 20703 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) { 20704 return vreinterpretq_s16_p16(a); 20705 } 20706 20707 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p64(<2 x i64> %a) #0 { 20708 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 20709 // CHECK: ret <8 x i16> [[TMP0]] 20710 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) { 20711 return vreinterpretq_s16_p64(a); 20712 } 20713 20714 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 { 20715 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 20716 // CHECK: ret <4 x i32> [[TMP0]] 20717 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) { 20718 return vreinterpretq_s32_s8(a); 20719 } 20720 20721 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 { 20722 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 20723 // CHECK: ret <4 x i32> [[TMP0]] 20724 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) { 20725 return vreinterpretq_s32_s16(a); 20726 } 20727 20728 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 { 20729 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 20730 // CHECK: ret <4 x i32> [[TMP0]] 20731 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) { 20732 return vreinterpretq_s32_s64(a); 20733 } 20734 20735 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 { 20736 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 20737 // CHECK: ret <4 x i32> [[TMP0]] 20738 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) { 20739 return vreinterpretq_s32_u8(a); 20740 } 20741 20742 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 { 20743 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 20744 // CHECK: ret <4 x i32> [[TMP0]] 20745 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) { 20746 return vreinterpretq_s32_u16(a); 20747 } 20748 20749 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 { 20750 // CHECK: ret <4 x i32> %a 20751 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) { 20752 return vreinterpretq_s32_u32(a); 20753 } 20754 20755 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 { 20756 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 20757 // CHECK: ret <4 x i32> [[TMP0]] 20758 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) { 20759 return vreinterpretq_s32_u64(a); 20760 } 20761 20762 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 { 20763 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 20764 // CHECK: ret <4 x i32> [[TMP0]] 20765 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) { 20766 return vreinterpretq_s32_f16(a); 20767 } 20768 20769 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 { 20770 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 20771 // CHECK: ret <4 x i32> [[TMP0]] 20772 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) { 20773 return vreinterpretq_s32_f32(a); 20774 } 20775 20776 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f64(<2 x double> %a) #0 { 20777 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32> 20778 // CHECK: ret <4 x i32> [[TMP0]] 20779 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) { 20780 return vreinterpretq_s32_f64(a); 20781 } 20782 20783 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 { 20784 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 20785 // CHECK: ret <4 x i32> [[TMP0]] 20786 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) { 20787 return vreinterpretq_s32_p8(a); 20788 } 20789 20790 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 { 20791 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 20792 // CHECK: ret <4 x i32> [[TMP0]] 20793 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) { 20794 return vreinterpretq_s32_p16(a); 20795 } 20796 20797 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p64(<2 x i64> %a) #0 { 20798 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 20799 // CHECK: ret <4 x i32> [[TMP0]] 20800 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) { 20801 return vreinterpretq_s32_p64(a); 20802 } 20803 20804 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 { 20805 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 20806 // CHECK: ret <2 x i64> [[TMP0]] 20807 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) { 20808 return vreinterpretq_s64_s8(a); 20809 } 20810 20811 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 { 20812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 20813 // CHECK: ret <2 x i64> [[TMP0]] 20814 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) { 20815 return vreinterpretq_s64_s16(a); 20816 } 20817 20818 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 { 20819 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 20820 // CHECK: ret <2 x i64> [[TMP0]] 20821 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) { 20822 return vreinterpretq_s64_s32(a); 20823 } 20824 20825 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 { 20826 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 20827 // CHECK: ret <2 x i64> [[TMP0]] 20828 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) { 20829 return vreinterpretq_s64_u8(a); 20830 } 20831 20832 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 { 20833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 20834 // CHECK: ret <2 x i64> [[TMP0]] 20835 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) { 20836 return vreinterpretq_s64_u16(a); 20837 } 20838 20839 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 { 20840 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 20841 // CHECK: ret <2 x i64> [[TMP0]] 20842 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) { 20843 return vreinterpretq_s64_u32(a); 20844 } 20845 20846 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 { 20847 // CHECK: ret <2 x i64> %a 20848 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) { 20849 return vreinterpretq_s64_u64(a); 20850 } 20851 20852 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 { 20853 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 20854 // CHECK: ret <2 x i64> [[TMP0]] 20855 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) { 20856 return vreinterpretq_s64_f16(a); 20857 } 20858 20859 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 { 20860 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 20861 // CHECK: ret <2 x i64> [[TMP0]] 20862 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) { 20863 return vreinterpretq_s64_f32(a); 20864 } 20865 20866 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f64(<2 x double> %a) #0 { 20867 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 20868 // CHECK: ret <2 x i64> [[TMP0]] 20869 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) { 20870 return vreinterpretq_s64_f64(a); 20871 } 20872 20873 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 { 20874 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 20875 // CHECK: ret <2 x i64> [[TMP0]] 20876 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) { 20877 return vreinterpretq_s64_p8(a); 20878 } 20879 20880 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 { 20881 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 20882 // CHECK: ret <2 x i64> [[TMP0]] 20883 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) { 20884 return vreinterpretq_s64_p16(a); 20885 } 20886 20887 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p64(<2 x i64> %a) #0 { 20888 // CHECK: ret <2 x i64> %a 20889 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) { 20890 return vreinterpretq_s64_p64(a); 20891 } 20892 20893 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 { 20894 // CHECK: ret <16 x i8> %a 20895 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) { 20896 return vreinterpretq_u8_s8(a); 20897 } 20898 20899 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 { 20900 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20901 // CHECK: ret <16 x i8> [[TMP0]] 20902 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) { 20903 return vreinterpretq_u8_s16(a); 20904 } 20905 20906 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 { 20907 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 20908 // CHECK: ret <16 x i8> [[TMP0]] 20909 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) { 20910 return vreinterpretq_u8_s32(a); 20911 } 20912 20913 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 { 20914 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20915 // CHECK: ret <16 x i8> [[TMP0]] 20916 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) { 20917 return vreinterpretq_u8_s64(a); 20918 } 20919 20920 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 { 20921 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20922 // CHECK: ret <16 x i8> [[TMP0]] 20923 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) { 20924 return vreinterpretq_u8_u16(a); 20925 } 20926 20927 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 { 20928 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 20929 // CHECK: ret <16 x i8> [[TMP0]] 20930 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) { 20931 return vreinterpretq_u8_u32(a); 20932 } 20933 20934 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 { 20935 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20936 // CHECK: ret <16 x i8> [[TMP0]] 20937 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) { 20938 return vreinterpretq_u8_u64(a); 20939 } 20940 20941 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 { 20942 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 20943 // CHECK: ret <16 x i8> [[TMP0]] 20944 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) { 20945 return vreinterpretq_u8_f16(a); 20946 } 20947 20948 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 { 20949 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 20950 // CHECK: ret <16 x i8> [[TMP0]] 20951 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) { 20952 return vreinterpretq_u8_f32(a); 20953 } 20954 20955 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f64(<2 x double> %a) #0 { 20956 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 20957 // CHECK: ret <16 x i8> [[TMP0]] 20958 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) { 20959 return vreinterpretq_u8_f64(a); 20960 } 20961 20962 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 { 20963 // CHECK: ret <16 x i8> %a 20964 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) { 20965 return vreinterpretq_u8_p8(a); 20966 } 20967 20968 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 { 20969 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 20970 // CHECK: ret <16 x i8> [[TMP0]] 20971 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) { 20972 return vreinterpretq_u8_p16(a); 20973 } 20974 20975 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p64(<2 x i64> %a) #0 { 20976 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 20977 // CHECK: ret <16 x i8> [[TMP0]] 20978 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) { 20979 return vreinterpretq_u8_p64(a); 20980 } 20981 20982 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 { 20983 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 20984 // CHECK: ret <8 x i16> [[TMP0]] 20985 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) { 20986 return vreinterpretq_u16_s8(a); 20987 } 20988 20989 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 { 20990 // CHECK: ret <8 x i16> %a 20991 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) { 20992 return vreinterpretq_u16_s16(a); 20993 } 20994 20995 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 { 20996 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 20997 // CHECK: ret <8 x i16> [[TMP0]] 20998 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) { 20999 return vreinterpretq_u16_s32(a); 21000 } 21001 21002 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 { 21003 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21004 // CHECK: ret <8 x i16> [[TMP0]] 21005 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) { 21006 return vreinterpretq_u16_s64(a); 21007 } 21008 21009 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 { 21010 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21011 // CHECK: ret <8 x i16> [[TMP0]] 21012 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) { 21013 return vreinterpretq_u16_u8(a); 21014 } 21015 21016 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 { 21017 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 21018 // CHECK: ret <8 x i16> [[TMP0]] 21019 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) { 21020 return vreinterpretq_u16_u32(a); 21021 } 21022 21023 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 { 21024 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21025 // CHECK: ret <8 x i16> [[TMP0]] 21026 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) { 21027 return vreinterpretq_u16_u64(a); 21028 } 21029 21030 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 { 21031 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 21032 // CHECK: ret <8 x i16> [[TMP0]] 21033 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) { 21034 return vreinterpretq_u16_f16(a); 21035 } 21036 21037 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 { 21038 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 21039 // CHECK: ret <8 x i16> [[TMP0]] 21040 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) { 21041 return vreinterpretq_u16_f32(a); 21042 } 21043 21044 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f64(<2 x double> %a) #0 { 21045 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 21046 // CHECK: ret <8 x i16> [[TMP0]] 21047 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) { 21048 return vreinterpretq_u16_f64(a); 21049 } 21050 21051 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 { 21052 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21053 // CHECK: ret <8 x i16> [[TMP0]] 21054 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) { 21055 return vreinterpretq_u16_p8(a); 21056 } 21057 21058 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 { 21059 // CHECK: ret <8 x i16> %a 21060 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) { 21061 return vreinterpretq_u16_p16(a); 21062 } 21063 21064 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p64(<2 x i64> %a) #0 { 21065 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21066 // CHECK: ret <8 x i16> [[TMP0]] 21067 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) { 21068 return vreinterpretq_u16_p64(a); 21069 } 21070 21071 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 { 21072 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 21073 // CHECK: ret <4 x i32> [[TMP0]] 21074 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) { 21075 return vreinterpretq_u32_s8(a); 21076 } 21077 21078 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 { 21079 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 21080 // CHECK: ret <4 x i32> [[TMP0]] 21081 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) { 21082 return vreinterpretq_u32_s16(a); 21083 } 21084 21085 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 { 21086 // CHECK: ret <4 x i32> %a 21087 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) { 21088 return vreinterpretq_u32_s32(a); 21089 } 21090 21091 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 { 21092 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 21093 // CHECK: ret <4 x i32> [[TMP0]] 21094 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) { 21095 return vreinterpretq_u32_s64(a); 21096 } 21097 21098 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 { 21099 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 21100 // CHECK: ret <4 x i32> [[TMP0]] 21101 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) { 21102 return vreinterpretq_u32_u8(a); 21103 } 21104 21105 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 { 21106 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 21107 // CHECK: ret <4 x i32> [[TMP0]] 21108 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) { 21109 return vreinterpretq_u32_u16(a); 21110 } 21111 21112 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 { 21113 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 21114 // CHECK: ret <4 x i32> [[TMP0]] 21115 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) { 21116 return vreinterpretq_u32_u64(a); 21117 } 21118 21119 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 { 21120 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 21121 // CHECK: ret <4 x i32> [[TMP0]] 21122 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) { 21123 return vreinterpretq_u32_f16(a); 21124 } 21125 21126 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 { 21127 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 21128 // CHECK: ret <4 x i32> [[TMP0]] 21129 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) { 21130 return vreinterpretq_u32_f32(a); 21131 } 21132 21133 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f64(<2 x double> %a) #0 { 21134 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32> 21135 // CHECK: ret <4 x i32> [[TMP0]] 21136 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) { 21137 return vreinterpretq_u32_f64(a); 21138 } 21139 21140 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 { 21141 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 21142 // CHECK: ret <4 x i32> [[TMP0]] 21143 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) { 21144 return vreinterpretq_u32_p8(a); 21145 } 21146 21147 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 { 21148 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 21149 // CHECK: ret <4 x i32> [[TMP0]] 21150 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) { 21151 return vreinterpretq_u32_p16(a); 21152 } 21153 21154 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p64(<2 x i64> %a) #0 { 21155 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 21156 // CHECK: ret <4 x i32> [[TMP0]] 21157 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) { 21158 return vreinterpretq_u32_p64(a); 21159 } 21160 21161 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 { 21162 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21163 // CHECK: ret <2 x i64> [[TMP0]] 21164 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) { 21165 return vreinterpretq_u64_s8(a); 21166 } 21167 21168 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 { 21169 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21170 // CHECK: ret <2 x i64> [[TMP0]] 21171 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) { 21172 return vreinterpretq_u64_s16(a); 21173 } 21174 21175 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 { 21176 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 21177 // CHECK: ret <2 x i64> [[TMP0]] 21178 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) { 21179 return vreinterpretq_u64_s32(a); 21180 } 21181 21182 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 { 21183 // CHECK: ret <2 x i64> %a 21184 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) { 21185 return vreinterpretq_u64_s64(a); 21186 } 21187 21188 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 { 21189 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21190 // CHECK: ret <2 x i64> [[TMP0]] 21191 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) { 21192 return vreinterpretq_u64_u8(a); 21193 } 21194 21195 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 { 21196 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21197 // CHECK: ret <2 x i64> [[TMP0]] 21198 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) { 21199 return vreinterpretq_u64_u16(a); 21200 } 21201 21202 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 { 21203 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 21204 // CHECK: ret <2 x i64> [[TMP0]] 21205 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) { 21206 return vreinterpretq_u64_u32(a); 21207 } 21208 21209 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 { 21210 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 21211 // CHECK: ret <2 x i64> [[TMP0]] 21212 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) { 21213 return vreinterpretq_u64_f16(a); 21214 } 21215 21216 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 { 21217 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 21218 // CHECK: ret <2 x i64> [[TMP0]] 21219 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) { 21220 return vreinterpretq_u64_f32(a); 21221 } 21222 21223 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f64(<2 x double> %a) #0 { 21224 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 21225 // CHECK: ret <2 x i64> [[TMP0]] 21226 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) { 21227 return vreinterpretq_u64_f64(a); 21228 } 21229 21230 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 { 21231 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21232 // CHECK: ret <2 x i64> [[TMP0]] 21233 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) { 21234 return vreinterpretq_u64_p8(a); 21235 } 21236 21237 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 { 21238 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21239 // CHECK: ret <2 x i64> [[TMP0]] 21240 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) { 21241 return vreinterpretq_u64_p16(a); 21242 } 21243 21244 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p64(<2 x i64> %a) #0 { 21245 // CHECK: ret <2 x i64> %a 21246 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) { 21247 return vreinterpretq_u64_p64(a); 21248 } 21249 21250 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 { 21251 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 21252 // CHECK: ret <8 x half> [[TMP0]] 21253 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) { 21254 return vreinterpretq_f16_s8(a); 21255 } 21256 21257 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 { 21258 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 21259 // CHECK: ret <8 x half> [[TMP0]] 21260 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) { 21261 return vreinterpretq_f16_s16(a); 21262 } 21263 21264 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 { 21265 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 21266 // CHECK: ret <8 x half> [[TMP0]] 21267 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) { 21268 return vreinterpretq_f16_s32(a); 21269 } 21270 21271 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 { 21272 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 21273 // CHECK: ret <8 x half> [[TMP0]] 21274 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) { 21275 return vreinterpretq_f16_s64(a); 21276 } 21277 21278 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 { 21279 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 21280 // CHECK: ret <8 x half> [[TMP0]] 21281 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) { 21282 return vreinterpretq_f16_u8(a); 21283 } 21284 21285 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 { 21286 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 21287 // CHECK: ret <8 x half> [[TMP0]] 21288 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) { 21289 return vreinterpretq_f16_u16(a); 21290 } 21291 21292 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 { 21293 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 21294 // CHECK: ret <8 x half> [[TMP0]] 21295 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) { 21296 return vreinterpretq_f16_u32(a); 21297 } 21298 21299 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 { 21300 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 21301 // CHECK: ret <8 x half> [[TMP0]] 21302 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) { 21303 return vreinterpretq_f16_u64(a); 21304 } 21305 21306 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 { 21307 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half> 21308 // CHECK: ret <8 x half> [[TMP0]] 21309 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) { 21310 return vreinterpretq_f16_f32(a); 21311 } 21312 21313 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f64(<2 x double> %a) #0 { 21314 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half> 21315 // CHECK: ret <8 x half> [[TMP0]] 21316 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) { 21317 return vreinterpretq_f16_f64(a); 21318 } 21319 21320 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 { 21321 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 21322 // CHECK: ret <8 x half> [[TMP0]] 21323 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) { 21324 return vreinterpretq_f16_p8(a); 21325 } 21326 21327 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 { 21328 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 21329 // CHECK: ret <8 x half> [[TMP0]] 21330 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) { 21331 return vreinterpretq_f16_p16(a); 21332 } 21333 21334 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p64(<2 x i64> %a) #0 { 21335 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 21336 // CHECK: ret <8 x half> [[TMP0]] 21337 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) { 21338 return vreinterpretq_f16_p64(a); 21339 } 21340 21341 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 { 21342 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 21343 // CHECK: ret <4 x float> [[TMP0]] 21344 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) { 21345 return vreinterpretq_f32_s8(a); 21346 } 21347 21348 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 { 21349 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 21350 // CHECK: ret <4 x float> [[TMP0]] 21351 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) { 21352 return vreinterpretq_f32_s16(a); 21353 } 21354 21355 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 { 21356 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 21357 // CHECK: ret <4 x float> [[TMP0]] 21358 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) { 21359 return vreinterpretq_f32_s32(a); 21360 } 21361 21362 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 { 21363 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 21364 // CHECK: ret <4 x float> [[TMP0]] 21365 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) { 21366 return vreinterpretq_f32_s64(a); 21367 } 21368 21369 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 { 21370 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 21371 // CHECK: ret <4 x float> [[TMP0]] 21372 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) { 21373 return vreinterpretq_f32_u8(a); 21374 } 21375 21376 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 { 21377 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 21378 // CHECK: ret <4 x float> [[TMP0]] 21379 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) { 21380 return vreinterpretq_f32_u16(a); 21381 } 21382 21383 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 { 21384 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 21385 // CHECK: ret <4 x float> [[TMP0]] 21386 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) { 21387 return vreinterpretq_f32_u32(a); 21388 } 21389 21390 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 { 21391 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 21392 // CHECK: ret <4 x float> [[TMP0]] 21393 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) { 21394 return vreinterpretq_f32_u64(a); 21395 } 21396 21397 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 { 21398 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float> 21399 // CHECK: ret <4 x float> [[TMP0]] 21400 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) { 21401 return vreinterpretq_f32_f16(a); 21402 } 21403 21404 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f64(<2 x double> %a) #0 { 21405 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float> 21406 // CHECK: ret <4 x float> [[TMP0]] 21407 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) { 21408 return vreinterpretq_f32_f64(a); 21409 } 21410 21411 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 { 21412 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 21413 // CHECK: ret <4 x float> [[TMP0]] 21414 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) { 21415 return vreinterpretq_f32_p8(a); 21416 } 21417 21418 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 { 21419 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 21420 // CHECK: ret <4 x float> [[TMP0]] 21421 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) { 21422 return vreinterpretq_f32_p16(a); 21423 } 21424 21425 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p64(<2 x i64> %a) #0 { 21426 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 21427 // CHECK: ret <4 x float> [[TMP0]] 21428 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) { 21429 return vreinterpretq_f32_p64(a); 21430 } 21431 21432 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s8(<16 x i8> %a) #0 { 21433 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 21434 // CHECK: ret <2 x double> [[TMP0]] 21435 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) { 21436 return vreinterpretq_f64_s8(a); 21437 } 21438 21439 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s16(<8 x i16> %a) #0 { 21440 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 21441 // CHECK: ret <2 x double> [[TMP0]] 21442 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) { 21443 return vreinterpretq_f64_s16(a); 21444 } 21445 21446 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s32(<4 x i32> %a) #0 { 21447 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double> 21448 // CHECK: ret <2 x double> [[TMP0]] 21449 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) { 21450 return vreinterpretq_f64_s32(a); 21451 } 21452 21453 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_s64(<2 x i64> %a) #0 { 21454 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 21455 // CHECK: ret <2 x double> [[TMP0]] 21456 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) { 21457 return vreinterpretq_f64_s64(a); 21458 } 21459 21460 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u8(<16 x i8> %a) #0 { 21461 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 21462 // CHECK: ret <2 x double> [[TMP0]] 21463 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) { 21464 return vreinterpretq_f64_u8(a); 21465 } 21466 21467 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u16(<8 x i16> %a) #0 { 21468 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 21469 // CHECK: ret <2 x double> [[TMP0]] 21470 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) { 21471 return vreinterpretq_f64_u16(a); 21472 } 21473 21474 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u32(<4 x i32> %a) #0 { 21475 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double> 21476 // CHECK: ret <2 x double> [[TMP0]] 21477 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) { 21478 return vreinterpretq_f64_u32(a); 21479 } 21480 21481 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_u64(<2 x i64> %a) #0 { 21482 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 21483 // CHECK: ret <2 x double> [[TMP0]] 21484 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) { 21485 return vreinterpretq_f64_u64(a); 21486 } 21487 21488 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f16(<8 x half> %a) #0 { 21489 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double> 21490 // CHECK: ret <2 x double> [[TMP0]] 21491 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) { 21492 return vreinterpretq_f64_f16(a); 21493 } 21494 21495 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_f32(<4 x float> %a) #0 { 21496 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double> 21497 // CHECK: ret <2 x double> [[TMP0]] 21498 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) { 21499 return vreinterpretq_f64_f32(a); 21500 } 21501 21502 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p8(<16 x i8> %a) #0 { 21503 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 21504 // CHECK: ret <2 x double> [[TMP0]] 21505 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) { 21506 return vreinterpretq_f64_p8(a); 21507 } 21508 21509 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p16(<8 x i16> %a) #0 { 21510 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 21511 // CHECK: ret <2 x double> [[TMP0]] 21512 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) { 21513 return vreinterpretq_f64_p16(a); 21514 } 21515 21516 // CHECK-LABEL: define <2 x double> @test_vreinterpretq_f64_p64(<2 x i64> %a) #0 { 21517 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 21518 // CHECK: ret <2 x double> [[TMP0]] 21519 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) { 21520 return vreinterpretq_f64_p64(a); 21521 } 21522 21523 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 { 21524 // CHECK: ret <16 x i8> %a 21525 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) { 21526 return vreinterpretq_p8_s8(a); 21527 } 21528 21529 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 { 21530 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 21531 // CHECK: ret <16 x i8> [[TMP0]] 21532 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) { 21533 return vreinterpretq_p8_s16(a); 21534 } 21535 21536 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 { 21537 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 21538 // CHECK: ret <16 x i8> [[TMP0]] 21539 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) { 21540 return vreinterpretq_p8_s32(a); 21541 } 21542 21543 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 { 21544 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 21545 // CHECK: ret <16 x i8> [[TMP0]] 21546 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) { 21547 return vreinterpretq_p8_s64(a); 21548 } 21549 21550 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 { 21551 // CHECK: ret <16 x i8> %a 21552 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) { 21553 return vreinterpretq_p8_u8(a); 21554 } 21555 21556 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 { 21557 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 21558 // CHECK: ret <16 x i8> [[TMP0]] 21559 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) { 21560 return vreinterpretq_p8_u16(a); 21561 } 21562 21563 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 { 21564 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 21565 // CHECK: ret <16 x i8> [[TMP0]] 21566 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) { 21567 return vreinterpretq_p8_u32(a); 21568 } 21569 21570 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 { 21571 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 21572 // CHECK: ret <16 x i8> [[TMP0]] 21573 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) { 21574 return vreinterpretq_p8_u64(a); 21575 } 21576 21577 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 { 21578 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 21579 // CHECK: ret <16 x i8> [[TMP0]] 21580 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) { 21581 return vreinterpretq_p8_f16(a); 21582 } 21583 21584 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 { 21585 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 21586 // CHECK: ret <16 x i8> [[TMP0]] 21587 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) { 21588 return vreinterpretq_p8_f32(a); 21589 } 21590 21591 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f64(<2 x double> %a) #0 { 21592 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21593 // CHECK: ret <16 x i8> [[TMP0]] 21594 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) { 21595 return vreinterpretq_p8_f64(a); 21596 } 21597 21598 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 { 21599 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 21600 // CHECK: ret <16 x i8> [[TMP0]] 21601 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) { 21602 return vreinterpretq_p8_p16(a); 21603 } 21604 21605 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p64(<2 x i64> %a) #0 { 21606 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 21607 // CHECK: ret <16 x i8> [[TMP0]] 21608 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) { 21609 return vreinterpretq_p8_p64(a); 21610 } 21611 21612 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 { 21613 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21614 // CHECK: ret <8 x i16> [[TMP0]] 21615 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) { 21616 return vreinterpretq_p16_s8(a); 21617 } 21618 21619 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 { 21620 // CHECK: ret <8 x i16> %a 21621 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) { 21622 return vreinterpretq_p16_s16(a); 21623 } 21624 21625 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 { 21626 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 21627 // CHECK: ret <8 x i16> [[TMP0]] 21628 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) { 21629 return vreinterpretq_p16_s32(a); 21630 } 21631 21632 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 { 21633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21634 // CHECK: ret <8 x i16> [[TMP0]] 21635 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) { 21636 return vreinterpretq_p16_s64(a); 21637 } 21638 21639 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 { 21640 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21641 // CHECK: ret <8 x i16> [[TMP0]] 21642 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) { 21643 return vreinterpretq_p16_u8(a); 21644 } 21645 21646 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 { 21647 // CHECK: ret <8 x i16> %a 21648 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) { 21649 return vreinterpretq_p16_u16(a); 21650 } 21651 21652 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 { 21653 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 21654 // CHECK: ret <8 x i16> [[TMP0]] 21655 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) { 21656 return vreinterpretq_p16_u32(a); 21657 } 21658 21659 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 { 21660 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21661 // CHECK: ret <8 x i16> [[TMP0]] 21662 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) { 21663 return vreinterpretq_p16_u64(a); 21664 } 21665 21666 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 { 21667 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 21668 // CHECK: ret <8 x i16> [[TMP0]] 21669 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) { 21670 return vreinterpretq_p16_f16(a); 21671 } 21672 21673 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 { 21674 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 21675 // CHECK: ret <8 x i16> [[TMP0]] 21676 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) { 21677 return vreinterpretq_p16_f32(a); 21678 } 21679 21680 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f64(<2 x double> %a) #0 { 21681 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 21682 // CHECK: ret <8 x i16> [[TMP0]] 21683 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) { 21684 return vreinterpretq_p16_f64(a); 21685 } 21686 21687 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 { 21688 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 21689 // CHECK: ret <8 x i16> [[TMP0]] 21690 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) { 21691 return vreinterpretq_p16_p8(a); 21692 } 21693 21694 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p64(<2 x i64> %a) #0 { 21695 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 21696 // CHECK: ret <8 x i16> [[TMP0]] 21697 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) { 21698 return vreinterpretq_p16_p64(a); 21699 } 21700 21701 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s8(<16 x i8> %a) #0 { 21702 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21703 // CHECK: ret <2 x i64> [[TMP0]] 21704 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) { 21705 return vreinterpretq_p64_s8(a); 21706 } 21707 21708 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s16(<8 x i16> %a) #0 { 21709 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21710 // CHECK: ret <2 x i64> [[TMP0]] 21711 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) { 21712 return vreinterpretq_p64_s16(a); 21713 } 21714 21715 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s32(<4 x i32> %a) #0 { 21716 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 21717 // CHECK: ret <2 x i64> [[TMP0]] 21718 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) { 21719 return vreinterpretq_p64_s32(a); 21720 } 21721 21722 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_s64(<2 x i64> %a) #0 { 21723 // CHECK: ret <2 x i64> %a 21724 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) { 21725 return vreinterpretq_p64_s64(a); 21726 } 21727 21728 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u8(<16 x i8> %a) #0 { 21729 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21730 // CHECK: ret <2 x i64> [[TMP0]] 21731 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) { 21732 return vreinterpretq_p64_u8(a); 21733 } 21734 21735 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u16(<8 x i16> %a) #0 { 21736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21737 // CHECK: ret <2 x i64> [[TMP0]] 21738 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) { 21739 return vreinterpretq_p64_u16(a); 21740 } 21741 21742 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u32(<4 x i32> %a) #0 { 21743 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 21744 // CHECK: ret <2 x i64> [[TMP0]] 21745 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) { 21746 return vreinterpretq_p64_u32(a); 21747 } 21748 21749 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_u64(<2 x i64> %a) #0 { 21750 // CHECK: ret <2 x i64> %a 21751 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) { 21752 return vreinterpretq_p64_u64(a); 21753 } 21754 21755 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f16(<8 x half> %a) #0 { 21756 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 21757 // CHECK: ret <2 x i64> [[TMP0]] 21758 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) { 21759 return vreinterpretq_p64_f16(a); 21760 } 21761 21762 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f32(<4 x float> %a) #0 { 21763 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 21764 // CHECK: ret <2 x i64> [[TMP0]] 21765 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) { 21766 return vreinterpretq_p64_f32(a); 21767 } 21768 21769 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_f64(<2 x double> %a) #0 { 21770 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 21771 // CHECK: ret <2 x i64> [[TMP0]] 21772 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) { 21773 return vreinterpretq_p64_f64(a); 21774 } 21775 21776 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p8(<16 x i8> %a) #0 { 21777 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 21778 // CHECK: ret <2 x i64> [[TMP0]] 21779 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) { 21780 return vreinterpretq_p64_p8(a); 21781 } 21782 21783 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_p64_p16(<8 x i16> %a) #0 { 21784 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 21785 // CHECK: ret <2 x i64> [[TMP0]] 21786 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) { 21787 return vreinterpretq_p64_p16(a); 21788 } 21789 21790 // CHECK-LABEL: define float @test_vabds_f32(float %a, float %b) #0 { 21791 // CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) #4 21792 // CHECK: ret float [[VABDS_F32_I]] 21793 float32_t test_vabds_f32(float32_t a, float32_t b) { 21794 return vabds_f32(a, b); 21795 } 21796 21797 // CHECK-LABEL: define double @test_vabdd_f64(double %a, double %b) #0 { 21798 // CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) #4 21799 // CHECK: ret double [[VABDD_F64_I]] 21800 float64_t test_vabdd_f64(float64_t a, float64_t b) { 21801 return vabdd_f64(a, b); 21802 } 21803 21804 // CHECK-LABEL: define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { 21805 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21806 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 21807 // CHECK: [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21808 // CHECK: [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 21809 // CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> [[VUQADD_I]], <1 x i64> [[VUQADD1_I]]) #4 21810 // CHECK: ret <1 x i64> [[VUQADD2_I]] 21811 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) { 21812 return vuqadd_s64(a, b); 21813 } 21814 21815 // CHECK-LABEL: define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { 21816 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21817 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 21818 // CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21819 // CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 21820 // CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> [[VSQADD_I]], <1 x i64> [[VSQADD1_I]]) #4 21821 // CHECK: ret <1 x i64> [[VSQADD2_I]] 21822 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) { 21823 return vsqadd_u64(a, b); 21824 } 21825 21826 // CHECK-LABEL: define <8 x i8> @test_vsqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 21827 // CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 21828 // CHECK: ret <8 x i8> [[VSQADD_I]] 21829 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) { 21830 return vsqadd_u8(a, b); 21831 } 21832 21833 // CHECK-LABEL: define <16 x i8> @test_vsqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 21834 // CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 21835 // CHECK: ret <16 x i8> [[VSQADD_I]] 21836 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) { 21837 return vsqaddq_u8(a, b); 21838 } 21839 21840 // CHECK-LABEL: define <4 x i16> @test_vsqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 21841 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 21842 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 21843 // CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 21844 // CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 21845 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[VSQADD_I]], <4 x i16> [[VSQADD1_I]]) #4 21846 // CHECK: ret <4 x i16> [[VSQADD2_I]] 21847 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) { 21848 return vsqadd_u16(a, b); 21849 } 21850 21851 // CHECK-LABEL: define <8 x i16> @test_vsqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 21852 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 21853 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 21854 // CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 21855 // CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 21856 // CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> [[VSQADD_I]], <8 x i16> [[VSQADD1_I]]) #4 21857 // CHECK: ret <8 x i16> [[VSQADD2_I]] 21858 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) { 21859 return vsqaddq_u16(a, b); 21860 } 21861 21862 // CHECK-LABEL: define <2 x i32> @test_vsqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 21863 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 21864 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 21865 // CHECK: [[VSQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 21866 // CHECK: [[VSQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 21867 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> [[VSQADD_I]], <2 x i32> [[VSQADD1_I]]) #4 21868 // CHECK: ret <2 x i32> [[VSQADD2_I]] 21869 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) { 21870 return vsqadd_u32(a, b); 21871 } 21872 21873 // CHECK-LABEL: define <4 x i32> @test_vsqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 21874 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 21875 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 21876 // CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 21877 // CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 21878 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> [[VSQADD_I]], <4 x i32> [[VSQADD1_I]]) #4 21879 // CHECK: ret <4 x i32> [[VSQADD2_I]] 21880 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) { 21881 return vsqaddq_u32(a, b); 21882 } 21883 21884 // CHECK-LABEL: define <2 x i64> @test_vsqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 21885 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 21886 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 21887 // CHECK: [[VSQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 21888 // CHECK: [[VSQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 21889 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> [[VSQADD_I]], <2 x i64> [[VSQADD1_I]]) #4 21890 // CHECK: ret <2 x i64> [[VSQADD2_I]] 21891 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) { 21892 return vsqaddq_u64(a, b); 21893 } 21894 21895 // CHECK-LABEL: define <1 x i64> @test_vabs_s64(<1 x i64> %a) #0 { 21896 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21897 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21898 // CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> [[VABS_I]]) #4 21899 // CHECK: ret <1 x i64> [[VABS1_I]] 21900 int64x1_t test_vabs_s64(int64x1_t a) { 21901 return vabs_s64(a); 21902 } 21903 21904 // CHECK-LABEL: define <1 x i64> @test_vqabs_s64(<1 x i64> %a) #0 { 21905 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21906 // CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21907 // CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> [[VQABS_V_I]]) #4 21908 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8> 21909 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <1 x i64> 21910 // CHECK: ret <1 x i64> [[TMP1]] 21911 int64x1_t test_vqabs_s64(int64x1_t a) { 21912 return vqabs_s64(a); 21913 } 21914 21915 // CHECK-LABEL: define <1 x i64> @test_vqneg_s64(<1 x i64> %a) #0 { 21916 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 21917 // CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 21918 // CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> [[VQNEG_V_I]]) #4 21919 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8> 21920 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <1 x i64> 21921 // CHECK: ret <1 x i64> [[TMP1]] 21922 int64x1_t test_vqneg_s64(int64x1_t a) { 21923 return vqneg_s64(a); 21924 } 21925 21926 // CHECK-LABEL: define <1 x i64> @test_vneg_s64(<1 x i64> %a) #0 { 21927 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a 21928 // CHECK: ret <1 x i64> [[SUB_I]] 21929 int64x1_t test_vneg_s64(int64x1_t a) { 21930 return vneg_s64(a); 21931 } 21932 21933 // CHECK-LABEL: define float @test_vaddv_f32(<2 x float> %a) #0 { 21934 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 21935 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 21936 // CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> [[TMP1]]) #4 21937 // CHECK: ret float [[VADDV_F32_I]] 21938 float32_t test_vaddv_f32(float32x2_t a) { 21939 return vaddv_f32(a); 21940 } 21941 21942 // CHECK-LABEL: define float @test_vaddvq_f32(<4 x float> %a) #0 { 21943 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 21944 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 21945 // CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> [[TMP1]]) #4 21946 // CHECK: ret float [[VADDVQ_F32_I]] 21947 float32_t test_vaddvq_f32(float32x4_t a) { 21948 return vaddvq_f32(a); 21949 } 21950 21951 // CHECK-LABEL: define double @test_vaddvq_f64(<2 x double> %a) #0 { 21952 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21953 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 21954 // CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> [[TMP1]]) #4 21955 // CHECK: ret double [[VADDVQ_F64_I]] 21956 float64_t test_vaddvq_f64(float64x2_t a) { 21957 return vaddvq_f64(a); 21958 } 21959 21960 // CHECK-LABEL: define float @test_vmaxv_f32(<2 x float> %a) #0 { 21961 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 21962 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 21963 // CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> [[TMP1]]) #4 21964 // CHECK: ret float [[VMAXV_F32_I]] 21965 float32_t test_vmaxv_f32(float32x2_t a) { 21966 return vmaxv_f32(a); 21967 } 21968 21969 // CHECK-LABEL: define double @test_vmaxvq_f64(<2 x double> %a) #0 { 21970 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21971 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 21972 // CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> [[TMP1]]) #4 21973 // CHECK: ret double [[VMAXVQ_F64_I]] 21974 float64_t test_vmaxvq_f64(float64x2_t a) { 21975 return vmaxvq_f64(a); 21976 } 21977 21978 // CHECK-LABEL: define float @test_vminv_f32(<2 x float> %a) #0 { 21979 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 21980 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 21981 // CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[TMP1]]) #4 21982 // CHECK: ret float [[VMINV_F32_I]] 21983 float32_t test_vminv_f32(float32x2_t a) { 21984 return vminv_f32(a); 21985 } 21986 21987 // CHECK-LABEL: define double @test_vminvq_f64(<2 x double> %a) #0 { 21988 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21989 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 21990 // CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[TMP1]]) #4 21991 // CHECK: ret double [[VMINVQ_F64_I]] 21992 float64_t test_vminvq_f64(float64x2_t a) { 21993 return vminvq_f64(a); 21994 } 21995 21996 // CHECK-LABEL: define double @test_vmaxnmvq_f64(<2 x double> %a) #0 { 21997 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 21998 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 21999 // CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 22000 // CHECK: ret double [[VMAXNMVQ_F64_I]] 22001 float64_t test_vmaxnmvq_f64(float64x2_t a) { 22002 return vmaxnmvq_f64(a); 22003 } 22004 22005 // CHECK-LABEL: define float @test_vmaxnmv_f32(<2 x float> %a) #0 { 22006 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 22007 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 22008 // CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 22009 // CHECK: ret float [[VMAXNMV_F32_I]] 22010 float32_t test_vmaxnmv_f32(float32x2_t a) { 22011 return vmaxnmv_f32(a); 22012 } 22013 22014 // CHECK-LABEL: define double @test_vminnmvq_f64(<2 x double> %a) #0 { 22015 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 22016 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 22017 // CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[TMP1]]) #4 22018 // CHECK: ret double [[VMINNMVQ_F64_I]] 22019 float64_t test_vminnmvq_f64(float64x2_t a) { 22020 return vminnmvq_f64(a); 22021 } 22022 22023 // CHECK-LABEL: define float @test_vminnmv_f32(<2 x float> %a) #0 { 22024 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 22025 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 22026 // CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[TMP1]]) #4 22027 // CHECK: ret float [[VMINNMV_F32_I]] 22028 float32_t test_vminnmv_f32(float32x2_t a) { 22029 return vminnmv_f32(a); 22030 } 22031 22032 // CHECK-LABEL: define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 22033 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22034 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 22035 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22036 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 22037 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4 22038 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> 22039 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64> 22040 // CHECK: ret <2 x i64> [[TMP2]] 22041 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { 22042 return vpaddq_s64(a, b); 22043 } 22044 22045 // CHECK-LABEL: define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 22046 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22047 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 22048 // CHECK: [[VPADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22049 // CHECK: [[VPADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 22050 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[VPADDQ_V_I]], <2 x i64> [[VPADDQ_V1_I]]) #4 22051 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> 22052 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VPADDQ_V3_I]] to <2 x i64> 22053 // CHECK: ret <2 x i64> [[TMP2]] 22054 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) { 22055 return vpaddq_u64(a, b); 22056 } 22057 22058 // CHECK-LABEL: define i64 @test_vpaddd_u64(<2 x i64> %a) #0 { 22059 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22060 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22061 // CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 22062 // CHECK: ret i64 [[VPADDD_U64_I]] 22063 uint64_t test_vpaddd_u64(uint64x2_t a) { 22064 return vpaddd_u64(a); 22065 } 22066 22067 // CHECK-LABEL: define i64 @test_vaddvq_s64(<2 x i64> %a) #0 { 22068 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22069 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22070 // CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 22071 // CHECK: ret i64 [[VADDVQ_S64_I]] 22072 int64_t test_vaddvq_s64(int64x2_t a) { 22073 return vaddvq_s64(a); 22074 } 22075 22076 // CHECK-LABEL: define i64 @test_vaddvq_u64(<2 x i64> %a) #0 { 22077 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22078 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22079 // CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> [[TMP1]]) #4 22080 // CHECK: ret i64 [[VADDVQ_U64_I]] 22081 uint64_t test_vaddvq_u64(uint64x2_t a) { 22082 return vaddvq_u64(a); 22083 } 22084 22085 // CHECK-LABEL: define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) #0 { 22086 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b 22087 // CHECK: ret <1 x double> [[ADD_I]] 22088 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) { 22089 return vadd_f64(a, b); 22090 } 22091 22092 // CHECK-LABEL: define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) #0 { 22093 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b 22094 // CHECK: ret <1 x double> [[MUL_I]] 22095 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) { 22096 return vmul_f64(a, b); 22097 } 22098 22099 // CHECK-LABEL: define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) #0 { 22100 // CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b 22101 // CHECK: ret <1 x double> [[DIV_I]] 22102 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) { 22103 return vdiv_f64(a, b); 22104 } 22105 22106 // CHECK-LABEL: define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { 22107 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c 22108 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]] 22109 // CHECK: ret <1 x double> [[ADD_I]] 22110 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 22111 return vmla_f64(a, b, c); 22112 } 22113 22114 // CHECK-LABEL: define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { 22115 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c 22116 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]] 22117 // CHECK: ret <1 x double> [[SUB_I]] 22118 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 22119 return vmls_f64(a, b, c); 22120 } 22121 22122 // CHECK-LABEL: define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { 22123 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22124 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22125 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> 22126 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22127 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22128 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 22129 // CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4 22130 // CHECK: ret <1 x double> [[TMP6]] 22131 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 22132 return vfma_f64(a, b, c); 22133 } 22134 22135 // CHECK-LABEL: define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { 22136 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b 22137 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22138 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> 22139 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> 22140 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22141 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22142 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 22143 // CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4 22144 // CHECK: ret <1 x double> [[TMP6]] 22145 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 22146 return vfms_f64(a, b, c); 22147 } 22148 22149 // CHECK-LABEL: define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) #0 { 22150 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b 22151 // CHECK: ret <1 x double> [[SUB_I]] 22152 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) { 22153 return vsub_f64(a, b); 22154 } 22155 22156 // CHECK-LABEL: define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) #0 { 22157 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22158 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22159 // CHECK: [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22160 // CHECK: [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22161 // CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> [[VABD_I]], <1 x double> [[VABD1_I]]) #4 22162 // CHECK: ret <1 x double> [[VABD2_I]] 22163 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) { 22164 return vabd_f64(a, b); 22165 } 22166 22167 // CHECK-LABEL: define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) #0 { 22168 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22169 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22170 // CHECK: [[VMAX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22171 // CHECK: [[VMAX1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22172 // CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> [[VMAX_I]], <1 x double> [[VMAX1_I]]) #4 22173 // CHECK: ret <1 x double> [[VMAX2_I]] 22174 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) { 22175 return vmax_f64(a, b); 22176 } 22177 22178 // CHECK-LABEL: define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) #0 { 22179 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22180 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22181 // CHECK: [[VMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22182 // CHECK: [[VMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22183 // CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> [[VMIN_I]], <1 x double> [[VMIN1_I]]) #4 22184 // CHECK: ret <1 x double> [[VMIN2_I]] 22185 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) { 22186 return vmin_f64(a, b); 22187 } 22188 22189 // CHECK-LABEL: define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) #0 { 22190 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22191 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22192 // CHECK: [[VMAXNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22193 // CHECK: [[VMAXNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22194 // CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> [[VMAXNM_I]], <1 x double> [[VMAXNM1_I]]) #4 22195 // CHECK: ret <1 x double> [[VMAXNM2_I]] 22196 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) { 22197 return vmaxnm_f64(a, b); 22198 } 22199 22200 // CHECK-LABEL: define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) #0 { 22201 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22202 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22203 // CHECK: [[VMINNM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22204 // CHECK: [[VMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22205 // CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> [[VMINNM_I]], <1 x double> [[VMINNM1_I]]) #4 22206 // CHECK: ret <1 x double> [[VMINNM2_I]] 22207 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) { 22208 return vminnm_f64(a, b); 22209 } 22210 22211 // CHECK-LABEL: define <1 x double> @test_vabs_f64(<1 x double> %a) #0 { 22212 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22213 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22214 // CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> [[VABS_I]]) #4 22215 // CHECK: ret <1 x double> [[VABS1_I]] 22216 float64x1_t test_vabs_f64(float64x1_t a) { 22217 return vabs_f64(a); 22218 } 22219 22220 // CHECK-LABEL: define <1 x double> @test_vneg_f64(<1 x double> %a) #0 { 22221 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %a 22222 // CHECK: ret <1 x double> [[SUB_I]] 22223 float64x1_t test_vneg_f64(float64x1_t a) { 22224 return vneg_f64(a); 22225 } 22226 22227 // CHECK-LABEL: define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) #0 { 22228 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22229 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22230 // CHECK: [[TMP2:%.*]] = fptosi <1 x double> [[TMP1]] to <1 x i64> 22231 // CHECK: ret <1 x i64> [[TMP2]] 22232 int64x1_t test_vcvt_s64_f64(float64x1_t a) { 22233 return vcvt_s64_f64(a); 22234 } 22235 22236 // CHECK-LABEL: define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) #0 { 22237 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22238 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22239 // CHECK: [[TMP2:%.*]] = fptoui <1 x double> [[TMP1]] to <1 x i64> 22240 // CHECK: ret <1 x i64> [[TMP2]] 22241 uint64x1_t test_vcvt_u64_f64(float64x1_t a) { 22242 return vcvt_u64_f64(a); 22243 } 22244 22245 // CHECK-LABEL: define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) #0 { 22246 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22247 // CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22248 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4 22249 // CHECK: ret <1 x i64> [[VCVTN1_I]] 22250 int64x1_t test_vcvtn_s64_f64(float64x1_t a) { 22251 return vcvtn_s64_f64(a); 22252 } 22253 22254 // CHECK-LABEL: define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) #0 { 22255 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22256 // CHECK: [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22257 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> [[VCVTN_I]]) #4 22258 // CHECK: ret <1 x i64> [[VCVTN1_I]] 22259 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) { 22260 return vcvtn_u64_f64(a); 22261 } 22262 22263 // CHECK-LABEL: define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) #0 { 22264 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22265 // CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22266 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4 22267 // CHECK: ret <1 x i64> [[VCVTP1_I]] 22268 int64x1_t test_vcvtp_s64_f64(float64x1_t a) { 22269 return vcvtp_s64_f64(a); 22270 } 22271 22272 // CHECK-LABEL: define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) #0 { 22273 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22274 // CHECK: [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22275 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> [[VCVTP_I]]) #4 22276 // CHECK: ret <1 x i64> [[VCVTP1_I]] 22277 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) { 22278 return vcvtp_u64_f64(a); 22279 } 22280 22281 // CHECK-LABEL: define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) #0 { 22282 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22283 // CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22284 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4 22285 // CHECK: ret <1 x i64> [[VCVTM1_I]] 22286 int64x1_t test_vcvtm_s64_f64(float64x1_t a) { 22287 return vcvtm_s64_f64(a); 22288 } 22289 22290 // CHECK-LABEL: define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) #0 { 22291 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22292 // CHECK: [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22293 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> [[VCVTM_I]]) #4 22294 // CHECK: ret <1 x i64> [[VCVTM1_I]] 22295 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) { 22296 return vcvtm_u64_f64(a); 22297 } 22298 22299 // CHECK-LABEL: define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) #0 { 22300 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22301 // CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22302 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4 22303 // CHECK: ret <1 x i64> [[VCVTA1_I]] 22304 int64x1_t test_vcvta_s64_f64(float64x1_t a) { 22305 return vcvta_s64_f64(a); 22306 } 22307 22308 // CHECK-LABEL: define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) #0 { 22309 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22310 // CHECK: [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22311 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> [[VCVTA_I]]) #4 22312 // CHECK: ret <1 x i64> [[VCVTA1_I]] 22313 uint64x1_t test_vcvta_u64_f64(float64x1_t a) { 22314 return vcvta_u64_f64(a); 22315 } 22316 22317 // CHECK-LABEL: define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) #0 { 22318 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 22319 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 22320 // CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> [[TMP1]] to <1 x double> 22321 // CHECK: ret <1 x double> [[VCVT_I]] 22322 float64x1_t test_vcvt_f64_s64(int64x1_t a) { 22323 return vcvt_f64_s64(a); 22324 } 22325 22326 // CHECK-LABEL: define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) #0 { 22327 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 22328 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 22329 // CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> [[TMP1]] to <1 x double> 22330 // CHECK: ret <1 x double> [[VCVT_I]] 22331 float64x1_t test_vcvt_f64_u64(uint64x1_t a) { 22332 return vcvt_f64_u64(a); 22333 } 22334 22335 // CHECK-LABEL: define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) #0 { 22336 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22337 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22338 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) 22339 // CHECK: ret <1 x i64> [[VCVT_N1]] 22340 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) { 22341 return vcvt_n_s64_f64(a, 64); 22342 } 22343 22344 // CHECK-LABEL: define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) #0 { 22345 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22346 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22347 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) 22348 // CHECK: ret <1 x i64> [[VCVT_N1]] 22349 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) { 22350 return vcvt_n_u64_f64(a, 64); 22351 } 22352 22353 // CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) #0 { 22354 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 22355 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 22356 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) 22357 // CHECK: ret <1 x double> [[VCVT_N1]] 22358 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) { 22359 return vcvt_n_f64_s64(a, 64); 22360 } 22361 22362 // CHECK-LABEL: define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) #0 { 22363 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 22364 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 22365 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) 22366 // CHECK: ret <1 x double> [[VCVT_N1]] 22367 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) { 22368 return vcvt_n_f64_u64(a, 64); 22369 } 22370 22371 // CHECK-LABEL: define <1 x double> @test_vrndn_f64(<1 x double> %a) #0 { 22372 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22373 // CHECK: [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22374 // CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> [[VRNDN_I]]) #4 22375 // CHECK: ret <1 x double> [[VRNDN1_I]] 22376 float64x1_t test_vrndn_f64(float64x1_t a) { 22377 return vrndn_f64(a); 22378 } 22379 22380 // CHECK-LABEL: define <1 x double> @test_vrnda_f64(<1 x double> %a) #0 { 22381 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22382 // CHECK: [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22383 // CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> [[VRNDA_I]]) #4 22384 // CHECK: ret <1 x double> [[VRNDA1_I]] 22385 float64x1_t test_vrnda_f64(float64x1_t a) { 22386 return vrnda_f64(a); 22387 } 22388 22389 // CHECK-LABEL: define <1 x double> @test_vrndp_f64(<1 x double> %a) #0 { 22390 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22391 // CHECK: [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22392 // CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> [[VRNDP_I]]) #4 22393 // CHECK: ret <1 x double> [[VRNDP1_I]] 22394 float64x1_t test_vrndp_f64(float64x1_t a) { 22395 return vrndp_f64(a); 22396 } 22397 22398 // CHECK-LABEL: define <1 x double> @test_vrndm_f64(<1 x double> %a) #0 { 22399 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22400 // CHECK: [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22401 // CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> [[VRNDM_I]]) #4 22402 // CHECK: ret <1 x double> [[VRNDM1_I]] 22403 float64x1_t test_vrndm_f64(float64x1_t a) { 22404 return vrndm_f64(a); 22405 } 22406 22407 // CHECK-LABEL: define <1 x double> @test_vrndx_f64(<1 x double> %a) #0 { 22408 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22409 // CHECK: [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22410 // CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> [[VRNDX_I]]) #4 22411 // CHECK: ret <1 x double> [[VRNDX1_I]] 22412 float64x1_t test_vrndx_f64(float64x1_t a) { 22413 return vrndx_f64(a); 22414 } 22415 22416 // CHECK-LABEL: define <1 x double> @test_vrnd_f64(<1 x double> %a) #0 { 22417 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22418 // CHECK: [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22419 // CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> [[VRNDZ_I]]) #4 22420 // CHECK: ret <1 x double> [[VRNDZ1_I]] 22421 float64x1_t test_vrnd_f64(float64x1_t a) { 22422 return vrnd_f64(a); 22423 } 22424 22425 // CHECK-LABEL: define <1 x double> @test_vrndi_f64(<1 x double> %a) #0 { 22426 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22427 // CHECK: [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22428 // CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> [[VRNDI_I]]) #4 22429 // CHECK: ret <1 x double> [[VRNDI1_I]] 22430 float64x1_t test_vrndi_f64(float64x1_t a) { 22431 return vrndi_f64(a); 22432 } 22433 22434 // CHECK-LABEL: define <1 x double> @test_vrsqrte_f64(<1 x double> %a) #0 { 22435 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22436 // CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22437 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> [[VRSQRTE_V_I]]) #4 22438 // CHECK: ret <1 x double> [[VRSQRTE_V1_I]] 22439 float64x1_t test_vrsqrte_f64(float64x1_t a) { 22440 return vrsqrte_f64(a); 22441 } 22442 22443 // CHECK-LABEL: define <1 x double> @test_vrecpe_f64(<1 x double> %a) #0 { 22444 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22445 // CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22446 // CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> [[VRECPE_V_I]]) #4 22447 // CHECK: ret <1 x double> [[VRECPE_V1_I]] 22448 float64x1_t test_vrecpe_f64(float64x1_t a) { 22449 return vrecpe_f64(a); 22450 } 22451 22452 // CHECK-LABEL: define <1 x double> @test_vsqrt_f64(<1 x double> %a) #0 { 22453 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22454 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22455 // CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> [[TMP1]]) #4 22456 // CHECK: ret <1 x double> [[VSQRT_I]] 22457 float64x1_t test_vsqrt_f64(float64x1_t a) { 22458 return vsqrt_f64(a); 22459 } 22460 22461 // CHECK-LABEL: define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) #0 { 22462 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22463 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22464 // CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22465 // CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22466 // CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> [[VRECPS_V_I]], <1 x double> [[VRECPS_V1_I]]) #4 22467 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <1 x double> [[VRECPS_V2_I]] to <8 x i8> 22468 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <1 x double> 22469 // CHECK: ret <1 x double> [[TMP2]] 22470 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) { 22471 return vrecps_f64(a, b); 22472 } 22473 22474 // CHECK-LABEL: define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) #0 { 22475 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 22476 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 22477 // CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 22478 // CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 22479 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> [[VRSQRTS_V_I]], <1 x double> [[VRSQRTS_V1_I]]) #4 22480 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8> 22481 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <1 x double> 22482 // CHECK: ret <1 x double> [[TMP2]] 22483 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) { 22484 return vrsqrts_f64(a, b); 22485 } 22486 22487 // CHECK-LABEL: define i32 @test_vminv_s32(<2 x i32> %a) #0 { 22488 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22489 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22490 // CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22491 // CHECK: ret i32 [[VMINV_S32_I]] 22492 int32_t test_vminv_s32(int32x2_t a) { 22493 return vminv_s32(a); 22494 } 22495 22496 // CHECK-LABEL: define i32 @test_vminv_u32(<2 x i32> %a) #0 { 22497 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22498 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22499 // CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22500 // CHECK: ret i32 [[VMINV_U32_I]] 22501 uint32_t test_vminv_u32(uint32x2_t a) { 22502 return vminv_u32(a); 22503 } 22504 22505 // CHECK-LABEL: define i32 @test_vmaxv_s32(<2 x i32> %a) #0 { 22506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22507 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22508 // CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22509 // CHECK: ret i32 [[VMAXV_S32_I]] 22510 int32_t test_vmaxv_s32(int32x2_t a) { 22511 return vmaxv_s32(a); 22512 } 22513 22514 // CHECK-LABEL: define i32 @test_vmaxv_u32(<2 x i32> %a) #0 { 22515 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22516 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22517 // CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22518 // CHECK: ret i32 [[VMAXV_U32_I]] 22519 uint32_t test_vmaxv_u32(uint32x2_t a) { 22520 return vmaxv_u32(a); 22521 } 22522 22523 // CHECK-LABEL: define i32 @test_vaddv_s32(<2 x i32> %a) #0 { 22524 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22525 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22526 // CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22527 // CHECK: ret i32 [[VADDV_S32_I]] 22528 int32_t test_vaddv_s32(int32x2_t a) { 22529 return vaddv_s32(a); 22530 } 22531 22532 // CHECK-LABEL: define i32 @test_vaddv_u32(<2 x i32> %a) #0 { 22533 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22534 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22535 // CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> [[TMP1]]) #4 22536 // CHECK: ret i32 [[VADDV_U32_I]] 22537 uint32_t test_vaddv_u32(uint32x2_t a) { 22538 return vaddv_u32(a); 22539 } 22540 22541 // CHECK-LABEL: define i64 @test_vaddlv_s32(<2 x i32> %a) #0 { 22542 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22543 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22544 // CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4 22545 // CHECK: ret i64 [[VADDLV_S32_I]] 22546 int64_t test_vaddlv_s32(int32x2_t a) { 22547 return vaddlv_s32(a); 22548 } 22549 22550 // CHECK-LABEL: define i64 @test_vaddlv_u32(<2 x i32> %a) #0 { 22551 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22552 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22553 // CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> [[TMP1]]) #4 22554 // CHECK: ret i64 [[VADDLV_U32_I]] 22555 uint64_t test_vaddlv_u32(uint32x2_t a) { 22556 return vaddlv_u32(a); 22557 } 22558