1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 2 // RUN: -fallow-half-arguments-and-returns -emit-llvm -o - %s \ 3 // RUN: | opt -S -mem2reg | FileCheck %s 4 5 #include <arm_neon.h> 6 7 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_u8(i8* %a) #0 { 8 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 9 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 10 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 11 // CHECK: ret <16 x i8> [[LANE]] 12 uint8x16_t test_vld1q_dup_u8(uint8_t *a) { 13 return vld1q_dup_u8(a); 14 } 15 16 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_u16(i16* %a) #0 { 17 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 19 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] 20 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 21 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 22 // CHECK: ret <8 x i16> [[LANE]] 23 uint16x8_t test_vld1q_dup_u16(uint16_t *a) { 24 return vld1q_dup_u16(a); 25 } 26 27 // CHECK-LABEL: define <4 x i32> @test_vld1q_dup_u32(i32* %a) #0 { 28 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 29 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 30 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]] 31 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 32 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 33 // CHECK: ret <4 x i32> [[LANE]] 34 uint32x4_t test_vld1q_dup_u32(uint32_t *a) { 35 return vld1q_dup_u32(a); 36 } 37 38 // CHECK-LABEL: define <2 x i64> @test_vld1q_dup_u64(i64* %a) #0 { 39 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 40 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 41 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] 42 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 43 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 44 // CHECK: ret <2 x i64> [[LANE]] 45 uint64x2_t test_vld1q_dup_u64(uint64_t *a) { 46 return vld1q_dup_u64(a); 47 } 48 49 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_s8(i8* %a) #0 { 50 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 51 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 52 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 53 // CHECK: ret <16 x i8> [[LANE]] 54 int8x16_t test_vld1q_dup_s8(int8_t *a) { 55 return vld1q_dup_s8(a); 56 } 57 58 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_s16(i16* %a) #0 { 59 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 60 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 61 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] 62 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 63 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 64 // CHECK: ret <8 x i16> [[LANE]] 65 int16x8_t test_vld1q_dup_s16(int16_t *a) { 66 return vld1q_dup_s16(a); 67 } 68 69 // CHECK-LABEL: define <4 x i32> @test_vld1q_dup_s32(i32* %a) #0 { 70 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 71 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 72 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]] 73 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 74 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 75 // CHECK: ret <4 x i32> [[LANE]] 76 int32x4_t test_vld1q_dup_s32(int32_t *a) { 77 return vld1q_dup_s32(a); 78 } 79 80 // CHECK-LABEL: define <2 x i64> @test_vld1q_dup_s64(i64* %a) #0 { 81 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 82 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 83 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] 84 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 85 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 86 // CHECK: ret <2 x i64> [[LANE]] 87 int64x2_t test_vld1q_dup_s64(int64_t *a) { 88 return vld1q_dup_s64(a); 89 } 90 91 // CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(half* %a) #0 { 92 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 93 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 94 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] 95 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 96 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 97 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half> 98 // CHECK: ret <8 x half> [[TMP4]] 99 float16x8_t test_vld1q_dup_f16(float16_t *a) { 100 return vld1q_dup_f16(a); 101 } 102 103 // CHECK-LABEL: define <4 x float> @test_vld1q_dup_f32(float* %a) #0 { 104 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 105 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* 106 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]] 107 // CHECK: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 108 // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer 109 // CHECK: ret <4 x float> [[LANE]] 110 float32x4_t test_vld1q_dup_f32(float32_t *a) { 111 return vld1q_dup_f32(a); 112 } 113 114 // CHECK-LABEL: define <2 x double> @test_vld1q_dup_f64(double* %a) #0 { 115 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 116 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to double* 117 // CHECK: [[TMP2:%.*]] = load double, double* [[TMP1]] 118 // CHECK: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0 119 // CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP3]], <2 x i32> zeroinitializer 120 // CHECK: ret <2 x double> [[LANE]] 121 float64x2_t test_vld1q_dup_f64(float64_t *a) { 122 return vld1q_dup_f64(a); 123 } 124 125 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_p8(i8* %a) #0 { 126 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 127 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 128 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 129 // CHECK: ret <16 x i8> [[LANE]] 130 poly8x16_t test_vld1q_dup_p8(poly8_t *a) { 131 return vld1q_dup_p8(a); 132 } 133 134 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_p16(i16* %a) #0 { 135 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 136 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 137 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] 138 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 139 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 140 // CHECK: ret <8 x i16> [[LANE]] 141 poly16x8_t test_vld1q_dup_p16(poly16_t *a) { 142 return vld1q_dup_p16(a); 143 } 144 145 // CHECK-LABEL: define <2 x i64> @test_vld1q_dup_p64(i64* %a) #0 { 146 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 147 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 148 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] 149 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 150 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 151 // CHECK: ret <2 x i64> [[LANE]] 152 poly64x2_t test_vld1q_dup_p64(poly64_t *a) { 153 return vld1q_dup_p64(a); 154 } 155 156 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 { 157 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 158 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 159 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 160 // CHECK: ret <8 x i8> [[LANE]] 161 uint8x8_t test_vld1_dup_u8(uint8_t *a) { 162 return vld1_dup_u8(a); 163 } 164 165 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 { 166 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 167 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 168 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] 169 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 170 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 171 // CHECK: ret <4 x i16> [[LANE]] 172 uint16x4_t test_vld1_dup_u16(uint16_t *a) { 173 return vld1_dup_u16(a); 174 } 175 176 // CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 { 177 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 178 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 179 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]] 180 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 181 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer 182 // CHECK: ret <2 x i32> [[LANE]] 183 uint32x2_t test_vld1_dup_u32(uint32_t *a) { 184 return vld1_dup_u32(a); 185 } 186 187 // CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 { 188 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 189 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 190 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] 191 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0 192 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 193 // CHECK: ret <1 x i64> [[LANE]] 194 uint64x1_t test_vld1_dup_u64(uint64_t *a) { 195 return vld1_dup_u64(a); 196 } 197 198 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 { 199 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 200 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 201 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 202 // CHECK: ret <8 x i8> [[LANE]] 203 int8x8_t test_vld1_dup_s8(int8_t *a) { 204 return vld1_dup_s8(a); 205 } 206 207 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 { 208 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 209 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 210 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] 211 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 212 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 213 // CHECK: ret <4 x i16> [[LANE]] 214 int16x4_t test_vld1_dup_s16(int16_t *a) { 215 return vld1_dup_s16(a); 216 } 217 218 // CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 { 219 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 220 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 221 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]] 222 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 223 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer 224 // CHECK: ret <2 x i32> [[LANE]] 225 int32x2_t test_vld1_dup_s32(int32_t *a) { 226 return vld1_dup_s32(a); 227 } 228 229 // CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 { 230 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 231 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 232 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] 233 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0 234 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 235 // CHECK: ret <1 x i64> [[LANE]] 236 int64x1_t test_vld1_dup_s64(int64_t *a) { 237 return vld1_dup_s64(a); 238 } 239 240 // CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 { 241 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 242 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 243 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] 244 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 245 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 246 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half> 247 // CHECK: ret <4 x half> [[TMP4]] 248 float16x4_t test_vld1_dup_f16(float16_t *a) { 249 return vld1_dup_f16(a); 250 } 251 252 // CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 { 253 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 254 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* 255 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]] 256 // CHECK: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0 257 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer 258 // CHECK: ret <2 x float> [[LANE]] 259 float32x2_t test_vld1_dup_f32(float32_t *a) { 260 return vld1_dup_f32(a); 261 } 262 263 // CHECK-LABEL: define <1 x double> @test_vld1_dup_f64(double* %a) #0 { 264 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 265 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to double* 266 // CHECK: [[TMP2:%.*]] = load double, double* [[TMP1]] 267 // CHECK: [[TMP3:%.*]] = insertelement <1 x double> undef, double [[TMP2]], i32 0 268 // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer 269 // CHECK: ret <1 x double> [[LANE]] 270 float64x1_t test_vld1_dup_f64(float64_t *a) { 271 return vld1_dup_f64(a); 272 } 273 274 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 { 275 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 276 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 277 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 278 // CHECK: ret <8 x i8> [[LANE]] 279 poly8x8_t test_vld1_dup_p8(poly8_t *a) { 280 return vld1_dup_p8(a); 281 } 282 283 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 { 284 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 285 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 286 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]] 287 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 288 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 289 // CHECK: ret <4 x i16> [[LANE]] 290 poly16x4_t test_vld1_dup_p16(poly16_t *a) { 291 return vld1_dup_p16(a); 292 } 293 294 // CHECK-LABEL: define <1 x i64> @test_vld1_dup_p64(i64* %a) #0 { 295 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 296 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 297 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]] 298 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0 299 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 300 // CHECK: ret <1 x i64> [[LANE]] 301 poly64x1_t test_vld1_dup_p64(poly64_t *a) { 302 return vld1_dup_p64(a); 303 } 304 305 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_dup_u8(i8* %a) #0 { 306 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 307 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 308 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 309 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %a) 310 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 311 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] 312 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* 313 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 314 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 315 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 316 // CHECK: ret %struct.uint8x16x2_t [[TMP4]] 317 uint8x16x2_t test_vld2q_dup_u8(uint8_t *a) { 318 return vld2q_dup_u8(a); 319 } 320 321 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_dup_u16(i16* %a) #0 { 322 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 323 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 324 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 325 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 326 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 327 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) 328 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 329 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 330 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* 331 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 332 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 333 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 334 // CHECK: ret %struct.uint16x8x2_t [[TMP6]] 335 uint16x8x2_t test_vld2q_dup_u16(uint16_t *a) { 336 return vld2q_dup_u16(a); 337 } 338 339 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_dup_u32(i32* %a) #0 { 340 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 341 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 342 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 343 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 344 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 345 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* [[TMP2]]) 346 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 347 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] 348 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* 349 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 350 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 351 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 352 // CHECK: ret %struct.uint32x4x2_t [[TMP6]] 353 uint32x4x2_t test_vld2q_dup_u32(uint32_t *a) { 354 return vld2q_dup_u32(a); 355 } 356 357 // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_dup_u64(i64* %a) #0 { 358 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 359 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 360 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 361 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 362 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 363 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* [[TMP2]]) 364 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 365 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] 366 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* 367 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 368 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 369 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 370 // CHECK: ret %struct.uint64x2x2_t [[TMP6]] 371 uint64x2x2_t test_vld2q_dup_u64(uint64_t *a) { 372 return vld2q_dup_u64(a); 373 } 374 375 // CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) #0 { 376 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 377 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 378 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 379 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %a) 380 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 381 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] 382 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* 383 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 384 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 385 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 386 // CHECK: ret %struct.int8x16x2_t [[TMP4]] 387 int8x16x2_t test_vld2q_dup_s8(int8_t *a) { 388 return vld2q_dup_s8(a); 389 } 390 391 // CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_dup_s16(i16* %a) #0 { 392 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 393 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 394 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 395 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 396 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 397 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) 398 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 399 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 400 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* 401 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 402 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 403 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 404 // CHECK: ret %struct.int16x8x2_t [[TMP6]] 405 int16x8x2_t test_vld2q_dup_s16(int16_t *a) { 406 return vld2q_dup_s16(a); 407 } 408 409 // CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_dup_s32(i32* %a) #0 { 410 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 411 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 412 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 413 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 414 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 415 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* [[TMP2]]) 416 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 417 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], { <4 x i32>, <4 x i32> }* [[TMP3]] 418 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* 419 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 420 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 421 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 422 // CHECK: ret %struct.int32x4x2_t [[TMP6]] 423 int32x4x2_t test_vld2q_dup_s32(int32_t *a) { 424 return vld2q_dup_s32(a); 425 } 426 427 // CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) #0 { 428 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 429 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 430 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 431 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 432 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 433 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* [[TMP2]]) 434 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 435 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] 436 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* 437 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 438 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 439 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 440 // CHECK: ret %struct.int64x2x2_t [[TMP6]] 441 int64x2x2_t test_vld2q_dup_s64(int64_t *a) { 442 return vld2q_dup_s64(a); 443 } 444 445 // CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_dup_f16(half* %a) #0 { 446 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 447 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 448 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 449 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 450 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 451 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) 452 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 453 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 454 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* 455 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 456 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 457 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 458 // CHECK: ret %struct.float16x8x2_t [[TMP6]] 459 float16x8x2_t test_vld2q_dup_f16(float16_t *a) { 460 return vld2q_dup_f16(a); 461 } 462 463 // CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_dup_f32(float* %a) #0 { 464 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 465 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 466 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 467 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 468 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 469 // CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* [[TMP2]]) 470 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* 471 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], { <4 x float>, <4 x float> }* [[TMP3]] 472 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* 473 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 474 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 475 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 476 // CHECK: ret %struct.float32x4x2_t [[TMP6]] 477 float32x4x2_t test_vld2q_dup_f32(float32_t *a) { 478 return vld2q_dup_f32(a); 479 } 480 481 // CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) #0 { 482 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 483 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 484 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 485 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 486 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 487 // CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* [[TMP2]]) 488 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double> }* 489 // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], { <2 x double>, <2 x double> }* [[TMP3]] 490 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* 491 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 492 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 493 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 494 // CHECK: ret %struct.float64x2x2_t [[TMP6]] 495 float64x2x2_t test_vld2q_dup_f64(float64_t *a) { 496 return vld2q_dup_f64(a); 497 } 498 499 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_dup_p8(i8* %a) #0 { 500 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 501 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 502 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 503 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %a) 504 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 505 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], { <16 x i8>, <16 x i8> }* [[TMP1]] 506 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* 507 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 508 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 16, i1 false) 509 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 510 // CHECK: ret %struct.poly8x16x2_t [[TMP4]] 511 poly8x16x2_t test_vld2q_dup_p8(poly8_t *a) { 512 return vld2q_dup_p8(a); 513 } 514 515 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_dup_p16(i16* %a) #0 { 516 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 517 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 518 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 519 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 520 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 521 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* [[TMP2]]) 522 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 523 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], { <8 x i16>, <8 x i16> }* [[TMP3]] 524 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* 525 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 526 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 527 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 528 // CHECK: ret %struct.poly16x8x2_t [[TMP6]] 529 poly16x8x2_t test_vld2q_dup_p16(poly16_t *a) { 530 return vld2q_dup_p16(a); 531 } 532 533 // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_dup_p64(i64* %a) #0 { 534 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 535 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 536 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 537 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 538 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 539 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* [[TMP2]]) 540 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 541 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] 542 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* 543 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 544 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 545 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 546 // CHECK: ret %struct.poly64x2x2_t [[TMP6]] 547 poly64x2x2_t test_vld2q_dup_p64(poly64_t *a) { 548 return vld2q_dup_p64(a); 549 } 550 551 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_dup_u8(i8* %a) #0 { 552 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 553 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 554 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 555 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a) 556 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 557 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] 558 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* 559 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 560 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 561 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 562 // CHECK: ret %struct.uint8x8x2_t [[TMP4]] 563 uint8x8x2_t test_vld2_dup_u8(uint8_t *a) { 564 return vld2_dup_u8(a); 565 } 566 567 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_dup_u16(i16* %a) #0 { 568 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 569 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 570 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 571 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 572 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 573 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) 574 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 575 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 576 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* 577 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 578 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 579 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 580 // CHECK: ret %struct.uint16x4x2_t [[TMP6]] 581 uint16x4x2_t test_vld2_dup_u16(uint16_t *a) { 582 return vld2_dup_u16(a); 583 } 584 585 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_dup_u32(i32* %a) #0 { 586 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 587 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 588 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 589 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 590 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 591 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]]) 592 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 593 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] 594 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* 595 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 596 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 597 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 598 // CHECK: ret %struct.uint32x2x2_t [[TMP6]] 599 uint32x2x2_t test_vld2_dup_u32(uint32_t *a) { 600 return vld2_dup_u32(a); 601 } 602 603 // CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_dup_u64(i64* %a) #0 { 604 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 605 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 606 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 607 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 608 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 609 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]]) 610 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 611 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] 612 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* 613 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 614 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 615 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 616 // CHECK: ret %struct.uint64x1x2_t [[TMP6]] 617 uint64x1x2_t test_vld2_dup_u64(uint64_t *a) { 618 return vld2_dup_u64(a); 619 } 620 621 // CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) #0 { 622 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 623 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 624 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 625 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a) 626 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 627 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] 628 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* 629 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 630 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 631 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 632 // CHECK: ret %struct.int8x8x2_t [[TMP4]] 633 int8x8x2_t test_vld2_dup_s8(int8_t *a) { 634 return vld2_dup_s8(a); 635 } 636 637 // CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) #0 { 638 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 639 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 640 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 641 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 642 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 643 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) 644 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 645 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 646 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* 647 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 648 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 649 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 650 // CHECK: ret %struct.int16x4x2_t [[TMP6]] 651 int16x4x2_t test_vld2_dup_s16(int16_t *a) { 652 return vld2_dup_s16(a); 653 } 654 655 // CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) #0 { 656 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 657 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 658 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 659 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 660 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 661 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* [[TMP2]]) 662 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 663 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], { <2 x i32>, <2 x i32> }* [[TMP3]] 664 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* 665 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 666 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 667 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 668 // CHECK: ret %struct.int32x2x2_t [[TMP6]] 669 int32x2x2_t test_vld2_dup_s32(int32_t *a) { 670 return vld2_dup_s32(a); 671 } 672 673 // CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) #0 { 674 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 675 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 676 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 677 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 678 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 679 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]]) 680 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 681 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] 682 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* 683 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 684 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 685 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 686 // CHECK: ret %struct.int64x1x2_t [[TMP6]] 687 int64x1x2_t test_vld2_dup_s64(int64_t *a) { 688 return vld2_dup_s64(a); 689 } 690 691 // CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_dup_f16(half* %a) #0 { 692 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 693 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 694 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 695 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 696 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 697 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) 698 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 699 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 700 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* 701 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 702 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 703 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 704 // CHECK: ret %struct.float16x4x2_t [[TMP6]] 705 float16x4x2_t test_vld2_dup_f16(float16_t *a) { 706 return vld2_dup_f16(a); 707 } 708 709 // CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) #0 { 710 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 711 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 712 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 713 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 714 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 715 // CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* [[TMP2]]) 716 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 717 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], { <2 x float>, <2 x float> }* [[TMP3]] 718 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* 719 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 720 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 721 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 722 // CHECK: ret %struct.float32x2x2_t [[TMP6]] 723 float32x2x2_t test_vld2_dup_f32(float32_t *a) { 724 return vld2_dup_f32(a); 725 } 726 727 // CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) #0 { 728 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 729 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 730 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 731 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 732 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 733 // CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* [[TMP2]]) 734 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double> }* 735 // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], { <1 x double>, <1 x double> }* [[TMP3]] 736 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* 737 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 738 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 739 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 740 // CHECK: ret %struct.float64x1x2_t [[TMP6]] 741 float64x1x2_t test_vld2_dup_f64(float64_t *a) { 742 return vld2_dup_f64(a); 743 } 744 745 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_dup_p8(i8* %a) #0 { 746 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 747 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 748 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 749 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %a) 750 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 751 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], { <8 x i8>, <8 x i8> }* [[TMP1]] 752 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* 753 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 754 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) 755 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 756 // CHECK: ret %struct.poly8x8x2_t [[TMP4]] 757 poly8x8x2_t test_vld2_dup_p8(poly8_t *a) { 758 return vld2_dup_p8(a); 759 } 760 761 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_dup_p16(i16* %a) #0 { 762 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 763 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 764 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 765 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 766 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 767 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* [[TMP2]]) 768 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 769 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], { <4 x i16>, <4 x i16> }* [[TMP3]] 770 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* 771 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 772 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 773 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 774 // CHECK: ret %struct.poly16x4x2_t [[TMP6]] 775 poly16x4x2_t test_vld2_dup_p16(poly16_t *a) { 776 return vld2_dup_p16(a); 777 } 778 779 // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_dup_p64(i64* %a) #0 { 780 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 781 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 782 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 783 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 784 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 785 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* [[TMP2]]) 786 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 787 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] 788 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* 789 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 790 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 791 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 792 // CHECK: ret %struct.poly64x1x2_t [[TMP6]] 793 poly64x1x2_t test_vld2_dup_p64(poly64_t *a) { 794 return vld2_dup_p64(a); 795 } 796 797 // CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_dup_u8(i8* %a) #0 { 798 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 799 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 800 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 801 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %a) 802 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 803 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 804 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* 805 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 806 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 807 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 808 // CHECK: ret %struct.uint8x16x3_t [[TMP4]] 809 uint8x16x3_t test_vld3q_dup_u8(uint8_t *a) { 810 return vld3q_dup_u8(a); 811 // [{{x[0-9]+|sp}}] 812 } 813 814 // CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_dup_u16(i16* %a) #0 { 815 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 816 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 817 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 818 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 819 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 820 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) 821 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 822 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 823 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* 824 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 825 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 826 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 827 // CHECK: ret %struct.uint16x8x3_t [[TMP6]] 828 uint16x8x3_t test_vld3q_dup_u16(uint16_t *a) { 829 return vld3q_dup_u16(a); 830 // [{{x[0-9]+|sp}}] 831 } 832 833 // CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_dup_u32(i32* %a) #0 { 834 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 835 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 836 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 837 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 838 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 839 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* [[TMP2]]) 840 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 841 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 842 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* 843 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 844 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 845 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 846 // CHECK: ret %struct.uint32x4x3_t [[TMP6]] 847 uint32x4x3_t test_vld3q_dup_u32(uint32_t *a) { 848 return vld3q_dup_u32(a); 849 // [{{x[0-9]+|sp}}] 850 } 851 852 // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_dup_u64(i64* %a) #0 { 853 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 854 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 855 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 856 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 857 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 858 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* [[TMP2]]) 859 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 860 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 861 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* 862 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 863 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 864 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 865 // CHECK: ret %struct.uint64x2x3_t [[TMP6]] 866 uint64x2x3_t test_vld3q_dup_u64(uint64_t *a) { 867 return vld3q_dup_u64(a); 868 // [{{x[0-9]+|sp}}] 869 } 870 871 // CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_dup_s8(i8* %a) #0 { 872 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 873 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 874 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 875 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %a) 876 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 877 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 878 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* 879 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 880 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 881 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 882 // CHECK: ret %struct.int8x16x3_t [[TMP4]] 883 int8x16x3_t test_vld3q_dup_s8(int8_t *a) { 884 return vld3q_dup_s8(a); 885 // [{{x[0-9]+|sp}}] 886 } 887 888 // CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_dup_s16(i16* %a) #0 { 889 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 890 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 891 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 892 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 893 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 894 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) 895 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 896 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 897 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* 898 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 899 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 900 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 901 // CHECK: ret %struct.int16x8x3_t [[TMP6]] 902 int16x8x3_t test_vld3q_dup_s16(int16_t *a) { 903 return vld3q_dup_s16(a); 904 // [{{x[0-9]+|sp}}] 905 } 906 907 // CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_dup_s32(i32* %a) #0 { 908 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 909 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 910 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 911 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 912 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 913 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* [[TMP2]]) 914 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 915 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 916 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* 917 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 919 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 920 // CHECK: ret %struct.int32x4x3_t [[TMP6]] 921 int32x4x3_t test_vld3q_dup_s32(int32_t *a) { 922 return vld3q_dup_s32(a); 923 // [{{x[0-9]+|sp}}] 924 } 925 926 // CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) #0 { 927 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 928 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 929 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 930 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 931 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 932 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* [[TMP2]]) 933 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 934 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 935 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* 936 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 937 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 938 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 939 // CHECK: ret %struct.int64x2x3_t [[TMP6]] 940 int64x2x3_t test_vld3q_dup_s64(int64_t *a) { 941 return vld3q_dup_s64(a); 942 // [{{x[0-9]+|sp}}] 943 } 944 945 // CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_dup_f16(half* %a) #0 { 946 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 947 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 948 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 949 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 950 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 951 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) 952 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 953 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 954 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* 955 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 956 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 957 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 958 // CHECK: ret %struct.float16x8x3_t [[TMP6]] 959 float16x8x3_t test_vld3q_dup_f16(float16_t *a) { 960 return vld3q_dup_f16(a); 961 // [{{x[0-9]+|sp}}] 962 } 963 964 // CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_dup_f32(float* %a) #0 { 965 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 966 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 967 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 968 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 969 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 970 // CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* [[TMP2]]) 971 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* 972 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 973 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* 974 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 975 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 976 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 977 // CHECK: ret %struct.float32x4x3_t [[TMP6]] 978 float32x4x3_t test_vld3q_dup_f32(float32_t *a) { 979 return vld3q_dup_f32(a); 980 // [{{x[0-9]+|sp}}] 981 } 982 983 // CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) #0 { 984 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 985 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 986 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 987 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 988 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 989 // CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* [[TMP2]]) 990 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double> }* 991 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 992 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* 993 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 994 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 995 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 996 // CHECK: ret %struct.float64x2x3_t [[TMP6]] 997 float64x2x3_t test_vld3q_dup_f64(float64_t *a) { 998 return vld3q_dup_f64(a); 999 // [{{x[0-9]+|sp}}] 1000 } 1001 1002 // CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_dup_p8(i8* %a) #0 { 1003 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 1004 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 1005 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 1006 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %a) 1007 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 1008 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 1009 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* 1010 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 1011 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 48, i32 16, i1 false) 1012 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 1013 // CHECK: ret %struct.poly8x16x3_t [[TMP4]] 1014 poly8x16x3_t test_vld3q_dup_p8(poly8_t *a) { 1015 return vld3q_dup_p8(a); 1016 // [{{x[0-9]+|sp}}] 1017 } 1018 1019 // CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_dup_p16(i16* %a) #0 { 1020 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 1021 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 1022 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 1023 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1024 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1025 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* [[TMP2]]) 1026 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 1027 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 1028 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* 1029 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 1030 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 1031 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 1032 // CHECK: ret %struct.poly16x8x3_t [[TMP6]] 1033 poly16x8x3_t test_vld3q_dup_p16(poly16_t *a) { 1034 return vld3q_dup_p16(a); 1035 // [{{x[0-9]+|sp}}] 1036 } 1037 1038 // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_dup_p64(i64* %a) #0 { 1039 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 1040 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 1041 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 1042 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1043 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1044 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* [[TMP2]]) 1045 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 1046 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 1047 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* 1048 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 1049 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 1050 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 1051 // CHECK: ret %struct.poly64x2x3_t [[TMP6]] 1052 poly64x2x3_t test_vld3q_dup_p64(poly64_t *a) { 1053 return vld3q_dup_p64(a); 1054 // [{{x[0-9]+|sp}}] 1055 } 1056 1057 // CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_dup_u8(i8* %a) #0 { 1058 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 1059 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 1060 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 1061 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a) 1062 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 1063 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 1064 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* 1065 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 1066 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 1067 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 1068 // CHECK: ret %struct.uint8x8x3_t [[TMP4]] 1069 uint8x8x3_t test_vld3_dup_u8(uint8_t *a) { 1070 return vld3_dup_u8(a); 1071 // [{{x[0-9]+|sp}}] 1072 } 1073 1074 // CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_dup_u16(i16* %a) #0 { 1075 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 1076 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 1077 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 1078 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1079 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1080 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) 1081 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 1082 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 1083 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* 1084 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 1085 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1086 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 1087 // CHECK: ret %struct.uint16x4x3_t [[TMP6]] 1088 uint16x4x3_t test_vld3_dup_u16(uint16_t *a) { 1089 return vld3_dup_u16(a); 1090 // [{{x[0-9]+|sp}}] 1091 } 1092 1093 // CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_dup_u32(i32* %a) #0 { 1094 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 1095 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 1096 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 1097 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 1098 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 1099 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]]) 1100 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 1101 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 1102 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* 1103 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 1104 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1105 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 1106 // CHECK: ret %struct.uint32x2x3_t [[TMP6]] 1107 uint32x2x3_t test_vld3_dup_u32(uint32_t *a) { 1108 return vld3_dup_u32(a); 1109 // [{{x[0-9]+|sp}}] 1110 } 1111 1112 // CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_dup_u64(i64* %a) #0 { 1113 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 1114 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 1115 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 1116 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1117 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1118 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]]) 1119 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 1120 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 1121 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* 1122 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 1123 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1124 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 1125 // CHECK: ret %struct.uint64x1x3_t [[TMP6]] 1126 uint64x1x3_t test_vld3_dup_u64(uint64_t *a) { 1127 return vld3_dup_u64(a); 1128 // [{{x[0-9]+|sp}}] 1129 } 1130 1131 // CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) #0 { 1132 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 1133 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 1134 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 1135 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a) 1136 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 1137 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 1138 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* 1139 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 1140 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 1141 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 1142 // CHECK: ret %struct.int8x8x3_t [[TMP4]] 1143 int8x8x3_t test_vld3_dup_s8(int8_t *a) { 1144 return vld3_dup_s8(a); 1145 // [{{x[0-9]+|sp}}] 1146 } 1147 1148 // CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) #0 { 1149 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 1150 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 1151 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 1152 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1153 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1154 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) 1155 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 1156 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 1157 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* 1158 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 1159 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1160 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 1161 // CHECK: ret %struct.int16x4x3_t [[TMP6]] 1162 int16x4x3_t test_vld3_dup_s16(int16_t *a) { 1163 return vld3_dup_s16(a); 1164 // [{{x[0-9]+|sp}}] 1165 } 1166 1167 // CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) #0 { 1168 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 1169 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 1170 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 1171 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 1172 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 1173 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* [[TMP2]]) 1174 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 1175 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 1176 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* 1177 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 1178 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1179 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 1180 // CHECK: ret %struct.int32x2x3_t [[TMP6]] 1181 int32x2x3_t test_vld3_dup_s32(int32_t *a) { 1182 return vld3_dup_s32(a); 1183 // [{{x[0-9]+|sp}}] 1184 } 1185 1186 // CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) #0 { 1187 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 1188 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 1189 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 1190 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1191 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1192 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]]) 1193 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 1194 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 1195 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* 1196 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 1197 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1198 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 1199 // CHECK: ret %struct.int64x1x3_t [[TMP6]] 1200 int64x1x3_t test_vld3_dup_s64(int64_t *a) { 1201 return vld3_dup_s64(a); 1202 // [{{x[0-9]+|sp}}] 1203 } 1204 1205 // CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_dup_f16(half* %a) #0 { 1206 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 1207 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 1208 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 1209 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 1210 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1211 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) 1212 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 1213 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 1214 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* 1215 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 1216 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1217 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 1218 // CHECK: ret %struct.float16x4x3_t [[TMP6]] 1219 float16x4x3_t test_vld3_dup_f16(float16_t *a) { 1220 return vld3_dup_f16(a); 1221 // [{{x[0-9]+|sp}}] 1222 } 1223 1224 // CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) #0 { 1225 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 1226 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 1227 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 1228 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 1229 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 1230 // CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* [[TMP2]]) 1231 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 1232 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 1233 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* 1234 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 1235 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1236 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 1237 // CHECK: ret %struct.float32x2x3_t [[TMP6]] 1238 float32x2x3_t test_vld3_dup_f32(float32_t *a) { 1239 return vld3_dup_f32(a); 1240 // [{{x[0-9]+|sp}}] 1241 } 1242 1243 // CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) #0 { 1244 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 1245 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 1246 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 1247 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 1248 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 1249 // CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* [[TMP2]]) 1250 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double> }* 1251 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 1252 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* 1253 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 1254 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1255 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 1256 // CHECK: ret %struct.float64x1x3_t [[TMP6]] 1257 float64x1x3_t test_vld3_dup_f64(float64_t *a) { 1258 return vld3_dup_f64(a); 1259 // [{{x[0-9]+|sp}}] 1260 } 1261 1262 // CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_dup_p8(i8* %a) #0 { 1263 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 1264 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 1265 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 1266 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %a) 1267 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 1268 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 1269 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* 1270 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 1271 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 24, i32 8, i1 false) 1272 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 1273 // CHECK: ret %struct.poly8x8x3_t [[TMP4]] 1274 poly8x8x3_t test_vld3_dup_p8(poly8_t *a) { 1275 return vld3_dup_p8(a); 1276 // [{{x[0-9]+|sp}}] 1277 } 1278 1279 // CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_dup_p16(i16* %a) #0 { 1280 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 1281 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 1282 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 1283 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1284 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1285 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* [[TMP2]]) 1286 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 1287 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 1288 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* 1289 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 1290 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1291 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 1292 // CHECK: ret %struct.poly16x4x3_t [[TMP6]] 1293 poly16x4x3_t test_vld3_dup_p16(poly16_t *a) { 1294 return vld3_dup_p16(a); 1295 // [{{x[0-9]+|sp}}] 1296 } 1297 1298 // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_dup_p64(i64* %a) #0 { 1299 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 1300 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 1301 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 1302 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1303 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1304 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* [[TMP2]]) 1305 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 1306 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 1307 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* 1308 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 1309 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 1310 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 1311 // CHECK: ret %struct.poly64x1x3_t [[TMP6]] 1312 poly64x1x3_t test_vld3_dup_p64(poly64_t *a) { 1313 return vld3_dup_p64(a); 1314 // [{{x[0-9]+|sp}}] 1315 } 1316 1317 // CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_dup_u8(i8* %a) #0 { 1318 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 1319 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 1320 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 1321 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %a) 1322 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 1323 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 1324 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* 1325 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 1326 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 1327 // CHECK: [[TMP4:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 1328 // CHECK: ret %struct.uint8x16x4_t [[TMP4]] 1329 uint8x16x4_t test_vld4q_dup_u8(uint8_t *a) { 1330 return vld4q_dup_u8(a); 1331 } 1332 1333 // CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_dup_u16(i16* %a) #0 { 1334 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 1335 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 1336 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 1337 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1338 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1339 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) 1340 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 1341 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 1342 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* 1343 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 1344 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1345 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 1346 // CHECK: ret %struct.uint16x8x4_t [[TMP6]] 1347 uint16x8x4_t test_vld4q_dup_u16(uint16_t *a) { 1348 return vld4q_dup_u16(a); 1349 } 1350 1351 // CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_dup_u32(i32* %a) #0 { 1352 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 1353 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 1354 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 1355 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 1356 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 1357 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* [[TMP2]]) 1358 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 1359 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 1360 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* 1361 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 1362 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1363 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 1364 // CHECK: ret %struct.uint32x4x4_t [[TMP6]] 1365 uint32x4x4_t test_vld4q_dup_u32(uint32_t *a) { 1366 return vld4q_dup_u32(a); 1367 } 1368 1369 // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_dup_u64(i64* %a) #0 { 1370 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 1371 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 1372 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 1373 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1374 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1375 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* [[TMP2]]) 1376 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 1377 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 1378 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* 1379 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 1380 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1381 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 1382 // CHECK: ret %struct.uint64x2x4_t [[TMP6]] 1383 uint64x2x4_t test_vld4q_dup_u64(uint64_t *a) { 1384 return vld4q_dup_u64(a); 1385 } 1386 1387 // CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_dup_s8(i8* %a) #0 { 1388 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 1389 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 1390 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 1391 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %a) 1392 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 1393 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 1394 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* 1395 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 1396 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 1397 // CHECK: [[TMP4:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 1398 // CHECK: ret %struct.int8x16x4_t [[TMP4]] 1399 int8x16x4_t test_vld4q_dup_s8(int8_t *a) { 1400 return vld4q_dup_s8(a); 1401 } 1402 1403 // CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_dup_s16(i16* %a) #0 { 1404 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 1405 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 1406 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 1407 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1408 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1409 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) 1410 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 1411 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 1412 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* 1413 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 1414 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1415 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 1416 // CHECK: ret %struct.int16x8x4_t [[TMP6]] 1417 int16x8x4_t test_vld4q_dup_s16(int16_t *a) { 1418 return vld4q_dup_s16(a); 1419 } 1420 1421 // CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_dup_s32(i32* %a) #0 { 1422 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 1423 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 1424 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 1425 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 1426 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 1427 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* [[TMP2]]) 1428 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 1429 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP3]] 1430 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* 1431 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 1432 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1433 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 1434 // CHECK: ret %struct.int32x4x4_t [[TMP6]] 1435 int32x4x4_t test_vld4q_dup_s32(int32_t *a) { 1436 return vld4q_dup_s32(a); 1437 } 1438 1439 // CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) #0 { 1440 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 1441 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 1442 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 1443 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1444 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1445 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* [[TMP2]]) 1446 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 1447 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 1448 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* 1449 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 1450 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1451 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 1452 // CHECK: ret %struct.int64x2x4_t [[TMP6]] 1453 int64x2x4_t test_vld4q_dup_s64(int64_t *a) { 1454 return vld4q_dup_s64(a); 1455 } 1456 1457 // CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_dup_f16(half* %a) #0 { 1458 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 1459 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 1460 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 1461 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 1462 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1463 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) 1464 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 1465 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 1466 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* 1467 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 1468 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1469 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 1470 // CHECK: ret %struct.float16x8x4_t [[TMP6]] 1471 float16x8x4_t test_vld4q_dup_f16(float16_t *a) { 1472 return vld4q_dup_f16(a); 1473 } 1474 1475 // CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_dup_f32(float* %a) #0 { 1476 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 1477 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 1478 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 1479 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 1480 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 1481 // CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* [[TMP2]]) 1482 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 1483 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP3]] 1484 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* 1485 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 1486 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1487 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 1488 // CHECK: ret %struct.float32x4x4_t [[TMP6]] 1489 float32x4x4_t test_vld4q_dup_f32(float32_t *a) { 1490 return vld4q_dup_f32(a); 1491 } 1492 1493 // CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) #0 { 1494 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 1495 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 1496 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 1497 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 1498 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 1499 // CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* [[TMP2]]) 1500 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* 1501 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP3]] 1502 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* 1503 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 1504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1505 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 1506 // CHECK: ret %struct.float64x2x4_t [[TMP6]] 1507 float64x2x4_t test_vld4q_dup_f64(float64_t *a) { 1508 return vld4q_dup_f64(a); 1509 } 1510 1511 // CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_dup_p8(i8* %a) #0 { 1512 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 1513 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 1514 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 1515 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %a) 1516 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 1517 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 1518 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* 1519 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 1520 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 64, i32 16, i1 false) 1521 // CHECK: [[TMP4:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 1522 // CHECK: ret %struct.poly8x16x4_t [[TMP4]] 1523 poly8x16x4_t test_vld4q_dup_p8(poly8_t *a) { 1524 return vld4q_dup_p8(a); 1525 } 1526 1527 // CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_dup_p16(i16* %a) #0 { 1528 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 1529 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 1530 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 1531 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1532 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1533 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* [[TMP2]]) 1534 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 1535 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP3]] 1536 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* 1537 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 1538 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1539 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 1540 // CHECK: ret %struct.poly16x8x4_t [[TMP6]] 1541 poly16x8x4_t test_vld4q_dup_p16(poly16_t *a) { 1542 return vld4q_dup_p16(a); 1543 } 1544 1545 // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_dup_p64(i64* %a) #0 { 1546 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 1547 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 1548 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 1549 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1550 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1551 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* [[TMP2]]) 1552 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 1553 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 1554 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* 1555 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 1556 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 1557 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 1558 // CHECK: ret %struct.poly64x2x4_t [[TMP6]] 1559 poly64x2x4_t test_vld4q_dup_p64(poly64_t *a) { 1560 return vld4q_dup_p64(a); 1561 } 1562 1563 // CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_dup_u8(i8* %a) #0 { 1564 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 1565 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 1566 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 1567 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a) 1568 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 1569 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 1570 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* 1571 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 1572 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 1573 // CHECK: [[TMP4:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 1574 // CHECK: ret %struct.uint8x8x4_t [[TMP4]] 1575 uint8x8x4_t test_vld4_dup_u8(uint8_t *a) { 1576 return vld4_dup_u8(a); 1577 } 1578 1579 // CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_dup_u16(i16* %a) #0 { 1580 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 1581 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 1582 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 1583 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1584 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1585 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) 1586 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 1587 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 1588 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* 1589 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 1590 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1591 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 1592 // CHECK: ret %struct.uint16x4x4_t [[TMP6]] 1593 uint16x4x4_t test_vld4_dup_u16(uint16_t *a) { 1594 return vld4_dup_u16(a); 1595 } 1596 1597 // CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_dup_u32(i32* %a) #0 { 1598 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 1599 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 1600 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 1601 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 1602 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 1603 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]]) 1604 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 1605 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 1606 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* 1607 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 1608 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1609 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 1610 // CHECK: ret %struct.uint32x2x4_t [[TMP6]] 1611 uint32x2x4_t test_vld4_dup_u32(uint32_t *a) { 1612 return vld4_dup_u32(a); 1613 } 1614 1615 // CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_dup_u64(i64* %a) #0 { 1616 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 1617 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 1618 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 1619 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1620 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1621 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]]) 1622 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 1623 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 1624 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* 1625 // CHECK: [[TMP5:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 1626 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1627 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 1628 // CHECK: ret %struct.uint64x1x4_t [[TMP6]] 1629 uint64x1x4_t test_vld4_dup_u64(uint64_t *a) { 1630 return vld4_dup_u64(a); 1631 } 1632 1633 // CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) #0 { 1634 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 1635 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 1636 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 1637 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a) 1638 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 1639 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 1640 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* 1641 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 1642 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 1643 // CHECK: [[TMP4:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 1644 // CHECK: ret %struct.int8x8x4_t [[TMP4]] 1645 int8x8x4_t test_vld4_dup_s8(int8_t *a) { 1646 return vld4_dup_s8(a); 1647 } 1648 1649 // CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) #0 { 1650 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 1651 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 1652 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 1653 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1654 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1655 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) 1656 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 1657 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 1658 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* 1659 // CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 1660 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1661 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 1662 // CHECK: ret %struct.int16x4x4_t [[TMP6]] 1663 int16x4x4_t test_vld4_dup_s16(int16_t *a) { 1664 return vld4_dup_s16(a); 1665 } 1666 1667 // CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) #0 { 1668 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 1669 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 1670 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 1671 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 1672 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* 1673 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* [[TMP2]]) 1674 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 1675 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP3]] 1676 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* 1677 // CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 1678 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1679 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 1680 // CHECK: ret %struct.int32x2x4_t [[TMP6]] 1681 int32x2x4_t test_vld4_dup_s32(int32_t *a) { 1682 return vld4_dup_s32(a); 1683 } 1684 1685 // CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) #0 { 1686 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 1687 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 1688 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 1689 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1690 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1691 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]]) 1692 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 1693 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 1694 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* 1695 // CHECK: [[TMP5:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 1696 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1697 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 1698 // CHECK: ret %struct.int64x1x4_t [[TMP6]] 1699 int64x1x4_t test_vld4_dup_s64(int64_t *a) { 1700 return vld4_dup_s64(a); 1701 } 1702 1703 // CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_dup_f16(half* %a) #0 { 1704 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 1705 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 1706 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 1707 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 1708 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1709 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) 1710 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 1711 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 1712 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* 1713 // CHECK: [[TMP5:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 1714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1715 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 1716 // CHECK: ret %struct.float16x4x4_t [[TMP6]] 1717 float16x4x4_t test_vld4_dup_f16(float16_t *a) { 1718 return vld4_dup_f16(a); 1719 } 1720 1721 // CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) #0 { 1722 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 1723 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 1724 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 1725 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 1726 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to float* 1727 // CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* [[TMP2]]) 1728 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 1729 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP3]] 1730 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* 1731 // CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 1732 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1733 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 1734 // CHECK: ret %struct.float32x2x4_t [[TMP6]] 1735 float32x2x4_t test_vld4_dup_f32(float32_t *a) { 1736 return vld4_dup_f32(a); 1737 } 1738 1739 // CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) #0 { 1740 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 1741 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 1742 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 1743 // CHECK: [[TMP1:%.*]] = bitcast double* %a to i8* 1744 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* 1745 // CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* [[TMP2]]) 1746 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* 1747 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP3]] 1748 // CHECK: [[TMP4:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* 1749 // CHECK: [[TMP5:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 1750 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1751 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 1752 // CHECK: ret %struct.float64x1x4_t [[TMP6]] 1753 float64x1x4_t test_vld4_dup_f64(float64_t *a) { 1754 return vld4_dup_f64(a); 1755 } 1756 1757 // CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_dup_p8(i8* %a) #0 { 1758 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 1759 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 1760 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 1761 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %a) 1762 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 1763 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 1764 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* 1765 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 1766 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 32, i32 8, i1 false) 1767 // CHECK: [[TMP4:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 1768 // CHECK: ret %struct.poly8x8x4_t [[TMP4]] 1769 poly8x8x4_t test_vld4_dup_p8(poly8_t *a) { 1770 return vld4_dup_p8(a); 1771 } 1772 1773 // CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_dup_p16(i16* %a) #0 { 1774 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 1775 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 1776 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 1777 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 1778 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i16* 1779 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* [[TMP2]]) 1780 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 1781 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP3]] 1782 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* 1783 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 1784 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1785 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 1786 // CHECK: ret %struct.poly16x4x4_t [[TMP6]] 1787 poly16x4x4_t test_vld4_dup_p16(poly16_t *a) { 1788 return vld4_dup_p16(a); 1789 } 1790 1791 // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_dup_p64(i64* %a) #0 { 1792 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 1793 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 1794 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 1795 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 1796 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* 1797 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* [[TMP2]]) 1798 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 1799 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 1800 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* 1801 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 1802 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 1803 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 1804 // CHECK: ret %struct.poly64x1x4_t [[TMP6]] 1805 poly64x1x4_t test_vld4_dup_p64(poly64_t *a) { 1806 return vld4_dup_p64(a); 1807 } 1808 1809 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_u8(i8* %a, <16 x i8> %b) #0 { 1810 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 1811 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 1812 // CHECK: ret <16 x i8> [[VLD1_LANE]] 1813 uint8x16_t test_vld1q_lane_u8(uint8_t *a, uint8x16_t b) { 1814 return vld1q_lane_u8(a, b, 15); 1815 } 1816 1817 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_u16(i16* %a, <8 x i16> %b) #0 { 1818 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 1819 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1820 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1821 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 1822 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]] 1823 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 1824 // CHECK: ret <8 x i16> [[VLD1_LANE]] 1825 uint16x8_t test_vld1q_lane_u16(uint16_t *a, uint16x8_t b) { 1826 return vld1q_lane_u16(a, b, 7); 1827 } 1828 1829 // CHECK-LABEL: define <4 x i32> @test_vld1q_lane_u32(i32* %a, <4 x i32> %b) #0 { 1830 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 1831 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 1832 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1833 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 1834 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]] 1835 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3 1836 // CHECK: ret <4 x i32> [[VLD1_LANE]] 1837 uint32x4_t test_vld1q_lane_u32(uint32_t *a, uint32x4_t b) { 1838 return vld1q_lane_u32(a, b, 3); 1839 } 1840 1841 // CHECK-LABEL: define <2 x i64> @test_vld1q_lane_u64(i64* %a, <2 x i64> %b) #0 { 1842 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 1843 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 1844 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1845 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 1846 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]] 1847 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1 1848 // CHECK: ret <2 x i64> [[VLD1_LANE]] 1849 uint64x2_t test_vld1q_lane_u64(uint64_t *a, uint64x2_t b) { 1850 return vld1q_lane_u64(a, b, 1); 1851 } 1852 1853 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) #0 { 1854 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 1855 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 1856 // CHECK: ret <16 x i8> [[VLD1_LANE]] 1857 int8x16_t test_vld1q_lane_s8(int8_t *a, int8x16_t b) { 1858 return vld1q_lane_s8(a, b, 15); 1859 } 1860 1861 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) #0 { 1862 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 1863 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1864 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1865 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 1866 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]] 1867 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 1868 // CHECK: ret <8 x i16> [[VLD1_LANE]] 1869 int16x8_t test_vld1q_lane_s16(int16_t *a, int16x8_t b) { 1870 return vld1q_lane_s16(a, b, 7); 1871 } 1872 1873 // CHECK-LABEL: define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) #0 { 1874 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 1875 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 1876 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1877 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 1878 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]] 1879 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3 1880 // CHECK: ret <4 x i32> [[VLD1_LANE]] 1881 int32x4_t test_vld1q_lane_s32(int32_t *a, int32x4_t b) { 1882 return vld1q_lane_s32(a, b, 3); 1883 } 1884 1885 // CHECK-LABEL: define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) #0 { 1886 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 1887 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 1888 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1889 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 1890 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]] 1891 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1 1892 // CHECK: ret <2 x i64> [[VLD1_LANE]] 1893 int64x2_t test_vld1q_lane_s64(int64_t *a, int64x2_t b) { 1894 return vld1q_lane_s64(a, b, 1); 1895 } 1896 1897 // CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(half* %a, <8 x half> %b) #0 { 1898 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 1899 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 1900 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1901 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 1902 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]] 1903 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 1904 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[VLD1_LANE]] to <8 x half> 1905 // CHECK: ret <8 x half> [[TMP5]] 1906 float16x8_t test_vld1q_lane_f16(float16_t *a, float16x8_t b) { 1907 return vld1q_lane_f16(a, b, 7); 1908 } 1909 1910 // CHECK-LABEL: define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) #0 { 1911 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 1912 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1913 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1914 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float* 1915 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]] 1916 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3 1917 // CHECK: ret <4 x float> [[VLD1_LANE]] 1918 float32x4_t test_vld1q_lane_f32(float32_t *a, float32x4_t b) { 1919 return vld1q_lane_f32(a, b, 3); 1920 } 1921 1922 // CHECK-LABEL: define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) #0 { 1923 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 1924 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 1925 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1926 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to double* 1927 // CHECK: [[TMP4:%.*]] = load double, double* [[TMP3]] 1928 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP4]], i32 1 1929 // CHECK: ret <2 x double> [[VLD1_LANE]] 1930 float64x2_t test_vld1q_lane_f64(float64_t *a, float64x2_t b) { 1931 return vld1q_lane_f64(a, b, 1); 1932 } 1933 1934 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_p8(i8* %a, <16 x i8> %b) #0 { 1935 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 1936 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 1937 // CHECK: ret <16 x i8> [[VLD1_LANE]] 1938 poly8x16_t test_vld1q_lane_p8(poly8_t *a, poly8x16_t b) { 1939 return vld1q_lane_p8(a, b, 15); 1940 } 1941 1942 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_p16(i16* %a, <8 x i16> %b) #0 { 1943 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 1944 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1945 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 1946 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 1947 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]] 1948 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 1949 // CHECK: ret <8 x i16> [[VLD1_LANE]] 1950 poly16x8_t test_vld1q_lane_p16(poly16_t *a, poly16x8_t b) { 1951 return vld1q_lane_p16(a, b, 7); 1952 } 1953 1954 // CHECK-LABEL: define <2 x i64> @test_vld1q_lane_p64(i64* %a, <2 x i64> %b) #0 { 1955 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 1956 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 1957 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1958 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 1959 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]] 1960 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1 1961 // CHECK: ret <2 x i64> [[VLD1_LANE]] 1962 poly64x2_t test_vld1q_lane_p64(poly64_t *a, poly64x2_t b) { 1963 return vld1q_lane_p64(a, b, 1); 1964 } 1965 1966 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 { 1967 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 1968 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 1969 // CHECK: ret <8 x i8> [[VLD1_LANE]] 1970 uint8x8_t test_vld1_lane_u8(uint8_t *a, uint8x8_t b) { 1971 return vld1_lane_u8(a, b, 7); 1972 } 1973 1974 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 { 1975 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 1976 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1977 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1978 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 1979 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]] 1980 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 1981 // CHECK: ret <4 x i16> [[VLD1_LANE]] 1982 uint16x4_t test_vld1_lane_u16(uint16_t *a, uint16x4_t b) { 1983 return vld1_lane_u16(a, b, 3); 1984 } 1985 1986 // CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 { 1987 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 1988 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 1989 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1990 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 1991 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]] 1992 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1 1993 // CHECK: ret <2 x i32> [[VLD1_LANE]] 1994 uint32x2_t test_vld1_lane_u32(uint32_t *a, uint32x2_t b) { 1995 return vld1_lane_u32(a, b, 1); 1996 } 1997 1998 // CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 { 1999 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 2000 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 2001 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 2002 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 2003 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]] 2004 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 2005 // CHECK: ret <1 x i64> [[VLD1_LANE]] 2006 uint64x1_t test_vld1_lane_u64(uint64_t *a, uint64x1_t b) { 2007 return vld1_lane_u64(a, b, 0); 2008 } 2009 2010 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 { 2011 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 2012 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 2013 // CHECK: ret <8 x i8> [[VLD1_LANE]] 2014 int8x8_t test_vld1_lane_s8(int8_t *a, int8x8_t b) { 2015 return vld1_lane_s8(a, b, 7); 2016 } 2017 2018 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 { 2019 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 2020 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2021 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2022 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 2023 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]] 2024 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 2025 // CHECK: ret <4 x i16> [[VLD1_LANE]] 2026 int16x4_t test_vld1_lane_s16(int16_t *a, int16x4_t b) { 2027 return vld1_lane_s16(a, b, 3); 2028 } 2029 2030 // CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 { 2031 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 2032 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2033 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2034 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 2035 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]] 2036 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1 2037 // CHECK: ret <2 x i32> [[VLD1_LANE]] 2038 int32x2_t test_vld1_lane_s32(int32_t *a, int32x2_t b) { 2039 return vld1_lane_s32(a, b, 1); 2040 } 2041 2042 // CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 { 2043 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 2044 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 2045 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 2046 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 2047 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]] 2048 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 2049 // CHECK: ret <1 x i64> [[VLD1_LANE]] 2050 int64x1_t test_vld1_lane_s64(int64_t *a, int64x1_t b) { 2051 return vld1_lane_s64(a, b, 0); 2052 } 2053 2054 // CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 { 2055 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 2056 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 2057 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2058 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 2059 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]] 2060 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 2061 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[VLD1_LANE]] to <4 x half> 2062 // CHECK: ret <4 x half> [[TMP5]] 2063 float16x4_t test_vld1_lane_f16(float16_t *a, float16x4_t b) { 2064 return vld1_lane_f16(a, b, 3); 2065 } 2066 2067 // CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 { 2068 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 2069 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 2070 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 2071 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float* 2072 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]] 2073 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1 2074 // CHECK: ret <2 x float> [[VLD1_LANE]] 2075 float32x2_t test_vld1_lane_f32(float32_t *a, float32x2_t b) { 2076 return vld1_lane_f32(a, b, 1); 2077 } 2078 2079 // CHECK-LABEL: define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) #0 { 2080 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 2081 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 2082 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 2083 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to double* 2084 // CHECK: [[TMP4:%.*]] = load double, double* [[TMP3]] 2085 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x double> [[TMP2]], double [[TMP4]], i32 0 2086 // CHECK: ret <1 x double> [[VLD1_LANE]] 2087 float64x1_t test_vld1_lane_f64(float64_t *a, float64x1_t b) { 2088 return vld1_lane_f64(a, b, 0); 2089 } 2090 2091 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 { 2092 // CHECK: [[TMP0:%.*]] = load i8, i8* %a 2093 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 2094 // CHECK: ret <8 x i8> [[VLD1_LANE]] 2095 poly8x8_t test_vld1_lane_p8(poly8_t *a, poly8x8_t b) { 2096 return vld1_lane_p8(a, b, 7); 2097 } 2098 2099 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 { 2100 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 2101 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2102 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2103 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 2104 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]] 2105 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 2106 // CHECK: ret <4 x i16> [[VLD1_LANE]] 2107 poly16x4_t test_vld1_lane_p16(poly16_t *a, poly16x4_t b) { 2108 return vld1_lane_p16(a, b, 3); 2109 } 2110 2111 // CHECK-LABEL: define <1 x i64> @test_vld1_lane_p64(i64* %a, <1 x i64> %b) #0 { 2112 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 2113 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 2114 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 2115 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 2116 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]] 2117 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 2118 // CHECK: ret <1 x i64> [[VLD1_LANE]] 2119 poly64x1_t test_vld1_lane_p64(poly64_t *a, poly64x1_t b) { 2120 return vld1_lane_p64(a, b, 0); 2121 } 2122 2123 // CHECK-LABEL: define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 { 2124 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 2125 // CHECK: [[SRC:%.*]] = alloca %struct.int8x16x2_t, align 16 2126 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 2127 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 2128 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[SRC]], i32 0, i32 0 2129 // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 2130 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* 2131 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[SRC]] to i8* 2132 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2133 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 2134 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 2135 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 2136 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 2137 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 2138 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 2139 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 2140 // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %ptr) 2141 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8> }* 2142 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] 2143 // CHECK: [[TMP6:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL]] to i8* 2144 // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 2145 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) 2146 // CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16 2147 // CHECK: ret %struct.int8x16x2_t [[TMP8]] 2148 int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) { 2149 return vld2q_lane_s8(ptr, src, 15); 2150 } 2151 2152 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 { 2153 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 2154 // CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x2_t, align 16 2155 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 2156 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 2157 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[SRC]], i32 0, i32 0 2158 // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 2159 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* 2160 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[SRC]] to i8* 2161 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2162 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 2163 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 2164 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 2165 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 2166 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 2167 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 2168 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 2169 // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %ptr) 2170 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8> }* 2171 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] 2172 // CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL]] to i8* 2173 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 2174 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) 2175 // CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16 2176 // CHECK: ret %struct.uint8x16x2_t [[TMP8]] 2177 uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) { 2178 return vld2q_lane_u8(ptr, src, 15); 2179 } 2180 2181 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* %ptr, [2 x <16 x i8>] %src.coerce) #0 { 2182 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 2183 // CHECK: [[SRC:%.*]] = alloca %struct.poly8x16x2_t, align 16 2184 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 2185 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 2186 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[SRC]], i32 0, i32 0 2187 // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 2188 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* 2189 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[SRC]] to i8* 2190 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2191 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 2192 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 2193 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 2194 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 2195 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 2196 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 2197 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 2198 // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %ptr) 2199 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8> }* 2200 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] 2201 // CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL]] to i8* 2202 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 2203 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 32, i32 16, i1 false) 2204 // CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16 2205 // CHECK: ret %struct.poly8x16x2_t [[TMP8]] 2206 poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) { 2207 return vld2q_lane_p8(ptr, src, 15); 2208 } 2209 2210 // CHECK-LABEL: define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #0 { 2211 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 2212 // CHECK: [[SRC:%.*]] = alloca %struct.int8x16x3_t, align 16 2213 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 2214 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 2215 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[SRC]], i32 0, i32 0 2216 // CHECK: store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 2217 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* 2218 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[SRC]] to i8* 2219 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 2220 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 2221 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 2222 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 2223 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 2224 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 2225 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 2226 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 2227 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 2228 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 2229 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 2230 // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %ptr) 2231 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 2232 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] 2233 // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x16x3_t* [[RETVAL]] to i8* 2234 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 2235 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) 2236 // CHECK: [[TMP9:%.*]] = load %struct.int8x16x3_t, %struct.int8x16x3_t* [[RETVAL]], align 16 2237 // CHECK: ret %struct.int8x16x3_t [[TMP9]] 2238 int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) { 2239 return vld3q_lane_s8(ptr, src, 15); 2240 } 2241 2242 // CHECK-LABEL: define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* %ptr, [3 x <16 x i8>] %src.coerce) #0 { 2243 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 2244 // CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x3_t, align 16 2245 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 2246 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 2247 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[SRC]], i32 0, i32 0 2248 // CHECK: store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 2249 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* 2250 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[SRC]] to i8* 2251 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 2252 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 2253 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 2254 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 2255 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 2256 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 2257 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 2258 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 2259 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 2260 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 2261 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 2262 // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %ptr) 2263 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 2264 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] 2265 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x16x3_t* [[RETVAL]] to i8* 2266 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 2267 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) 2268 // CHECK: [[TMP9:%.*]] = load %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[RETVAL]], align 16 2269 // CHECK: ret %struct.uint8x16x3_t [[TMP9]] 2270 uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) { 2271 return vld3q_lane_u8(ptr, src, 15); 2272 } 2273 2274 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 2275 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 2276 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 2277 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 2278 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 2279 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 2280 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 2281 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 2282 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 2283 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2284 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 2285 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 2286 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 2287 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 2288 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 2289 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 2290 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 2291 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 2292 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 2293 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 2294 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 2295 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 2296 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]]) 2297 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }* 2298 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] 2299 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* 2300 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 2301 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2302 // CHECK: [[TMP13:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 2303 // CHECK: ret %struct.uint16x8x2_t [[TMP13]] 2304 uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { 2305 return vld2q_lane_u16(a, b, 7); 2306 } 2307 2308 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 2309 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 2310 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 2311 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 2312 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 2313 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 2314 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 2315 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 2316 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 2317 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2318 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 2319 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 2320 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 2321 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 2322 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 2323 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 2324 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 2325 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 2326 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 2327 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 2328 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 2329 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 2330 // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i8(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, i8* [[TMP3]]) 2331 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32> }* 2332 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]] 2333 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* 2334 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 2335 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2336 // CHECK: [[TMP13:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 2337 // CHECK: ret %struct.uint32x4x2_t [[TMP13]] 2338 uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) { 2339 return vld2q_lane_u32(a, b, 3); 2340 } 2341 2342 // CHECK-LABEL: define %struct.uint64x2x2_t @test_vld2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 2343 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 2344 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 2345 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 2346 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 2347 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 2348 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 2349 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* 2350 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* 2351 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2352 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 2353 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 2354 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 2355 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 2356 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 2357 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 2358 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 2359 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 2360 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 2361 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 2362 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 2363 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 2364 // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i8(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, i8* [[TMP3]]) 2365 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64> }* 2366 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] 2367 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* 2368 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* 2369 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2370 // CHECK: [[TMP13:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 2371 // CHECK: ret %struct.uint64x2x2_t [[TMP13]] 2372 uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) { 2373 return vld2q_lane_u64(a, b, 1); 2374 } 2375 2376 // CHECK-LABEL: define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 2377 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 2378 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 2379 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 2380 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 2381 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 2382 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 2383 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 2384 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 2385 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2386 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 2387 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 2388 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 2389 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 2390 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 2391 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 2392 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 2393 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 2394 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 2395 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 2396 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 2397 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 2398 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]]) 2399 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }* 2400 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] 2401 // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* 2402 // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 2403 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2404 // CHECK: [[TMP13:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 2405 // CHECK: ret %struct.int16x8x2_t [[TMP13]] 2406 int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) { 2407 return vld2q_lane_s16(a, b, 7); 2408 } 2409 2410 // CHECK-LABEL: define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 2411 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 2412 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 2413 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 2414 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 2415 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 2416 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 2417 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 2418 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 2419 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2420 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 2421 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 2422 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 2423 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 2424 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 2425 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 2426 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 2427 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 2428 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 2429 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 2430 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 2431 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 2432 // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i8(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, i8* [[TMP3]]) 2433 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32> }* 2434 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]] 2435 // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* 2436 // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 2437 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2438 // CHECK: [[TMP13:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 2439 // CHECK: ret %struct.int32x4x2_t [[TMP13]] 2440 int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) { 2441 return vld2q_lane_s32(a, b, 3); 2442 } 2443 2444 // CHECK-LABEL: define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 2445 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 2446 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 2447 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 2448 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 2449 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 2450 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 2451 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* 2452 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* 2453 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2454 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 2455 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 2456 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 2457 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 2458 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 2459 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 2460 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 2461 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 2462 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 2463 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 2464 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 2465 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 2466 // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i8(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, i8* [[TMP3]]) 2467 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64> }* 2468 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] 2469 // CHECK: [[TMP11:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* 2470 // CHECK: [[TMP12:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* 2471 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2472 // CHECK: [[TMP13:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 2473 // CHECK: ret %struct.int64x2x2_t [[TMP13]] 2474 int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) { 2475 return vld2q_lane_s64(a, b, 1); 2476 } 2477 2478 // CHECK-LABEL: define %struct.float16x8x2_t @test_vld2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 { 2479 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 2480 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 2481 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 2482 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 2483 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 2484 // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 2485 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 2486 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 2487 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2488 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 2489 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 2490 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 2491 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 2492 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 2493 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 2494 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 2495 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 2496 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 2497 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 2498 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 2499 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 2500 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]]) 2501 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }* 2502 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] 2503 // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* 2504 // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 2505 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2506 // CHECK: [[TMP13:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 2507 // CHECK: ret %struct.float16x8x2_t [[TMP13]] 2508 float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) { 2509 return vld2q_lane_f16(a, b, 7); 2510 } 2511 2512 // CHECK-LABEL: define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 { 2513 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 2514 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 2515 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 2516 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 2517 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 2518 // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 2519 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 2520 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 2521 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2522 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 2523 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 2524 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 2525 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 2526 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 2527 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 2528 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 2529 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 2530 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 2531 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 2532 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 2533 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 2534 // CHECK: [[VLD2_LANE:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0i8(<4 x float> [[TMP8]], <4 x float> [[TMP9]], i64 3, i8* [[TMP3]]) 2535 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float> }* 2536 // CHECK: store { <4 x float>, <4 x float> } [[VLD2_LANE]], { <4 x float>, <4 x float> }* [[TMP10]] 2537 // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* 2538 // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 2539 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2540 // CHECK: [[TMP13:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 2541 // CHECK: ret %struct.float32x4x2_t [[TMP13]] 2542 float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) { 2543 return vld2q_lane_f32(a, b, 3); 2544 } 2545 2546 // CHECK-LABEL: define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 { 2547 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 2548 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 2549 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 2550 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 2551 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 2552 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 2553 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* 2554 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* 2555 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2556 // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 2557 // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* 2558 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 2559 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 2560 // CHECK: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 2561 // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> 2562 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 2563 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 2564 // CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 2565 // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> 2566 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> 2567 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> 2568 // CHECK: [[VLD2_LANE:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0i8(<2 x double> [[TMP8]], <2 x double> [[TMP9]], i64 1, i8* [[TMP3]]) 2569 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x double>, <2 x double> }* 2570 // CHECK: store { <2 x double>, <2 x double> } [[VLD2_LANE]], { <2 x double>, <2 x double> }* [[TMP10]] 2571 // CHECK: [[TMP11:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* 2572 // CHECK: [[TMP12:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* 2573 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2574 // CHECK: [[TMP13:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 2575 // CHECK: ret %struct.float64x2x2_t [[TMP13]] 2576 float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) { 2577 return vld2q_lane_f64(a, b, 1); 2578 } 2579 2580 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 2581 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 2582 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 2583 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 2584 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 2585 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 2586 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 2587 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 2588 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 2589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2590 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 2591 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 2592 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 2593 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 2594 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 2595 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 2596 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 2597 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 2598 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 2599 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 2600 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 2601 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 2602 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]]) 2603 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }* 2604 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] 2605 // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* 2606 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 2607 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2608 // CHECK: [[TMP13:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 2609 // CHECK: ret %struct.poly16x8x2_t [[TMP13]] 2610 poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) { 2611 return vld2q_lane_p16(a, b, 7); 2612 } 2613 2614 // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 2615 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 2616 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 2617 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 2618 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 2619 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0 2620 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 2621 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* 2622 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* 2623 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 2624 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 2625 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 2626 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 2627 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 2628 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 2629 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 2630 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 2631 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 2632 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 2633 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 2634 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 2635 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 2636 // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i8(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, i8* [[TMP3]]) 2637 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64> }* 2638 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] 2639 // CHECK: [[TMP11:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* 2640 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 2641 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) 2642 // CHECK: [[TMP13:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 2643 // CHECK: ret %struct.poly64x2x2_t [[TMP13]] 2644 poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) { 2645 return vld2q_lane_p64(a, b, 1); 2646 } 2647 2648 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vld2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 2649 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 2650 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 2651 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 2652 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 2653 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 2654 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 2655 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 2656 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 2657 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2658 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 2659 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 2660 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 2661 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 2662 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 2663 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 2664 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 2665 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) 2666 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }* 2667 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] 2668 // CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL]] to i8* 2669 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 2670 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) 2671 // CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8 2672 // CHECK: ret %struct.uint8x8x2_t [[TMP8]] 2673 uint8x8x2_t test_vld2_lane_u8(uint8_t *a, uint8x8x2_t b) { 2674 return vld2_lane_u8(a, b, 7); 2675 } 2676 2677 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vld2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 2678 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 2679 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 2680 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 2681 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 2682 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 2683 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 2684 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 2685 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 2686 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2687 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 2688 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 2689 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 2690 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 2691 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 2692 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 2693 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 2694 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 2695 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 2696 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 2697 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 2698 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 2699 // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]]) 2700 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }* 2701 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] 2702 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* 2703 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 2704 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 2705 // CHECK: [[TMP13:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 2706 // CHECK: ret %struct.uint16x4x2_t [[TMP13]] 2707 uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { 2708 return vld2_lane_u16(a, b, 3); 2709 } 2710 2711 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vld2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 2712 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 2713 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 2714 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 2715 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 2716 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 2717 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 2718 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 2719 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 2720 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2721 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 2722 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 2723 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 2724 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 2725 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 2726 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 2727 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 2728 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 2729 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 2730 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 2731 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 2732 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 2733 // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i8(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, i8* [[TMP3]]) 2734 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32> }* 2735 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]] 2736 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* 2737 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 2738 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 2739 // CHECK: [[TMP13:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 2740 // CHECK: ret %struct.uint32x2x2_t [[TMP13]] 2741 uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) { 2742 return vld2_lane_u32(a, b, 1); 2743 } 2744 2745 // CHECK-LABEL: define %struct.uint64x1x2_t @test_vld2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 2746 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 2747 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 2748 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 2749 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 2750 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 2751 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 2752 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* 2753 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* 2754 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2755 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 2756 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 2757 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 2758 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 2759 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 2760 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 2761 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 2762 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 2763 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 2764 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 2765 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 2766 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 2767 // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, i8* [[TMP3]]) 2768 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64> }* 2769 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] 2770 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* 2771 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 2772 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 2773 // CHECK: [[TMP13:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 2774 // CHECK: ret %struct.uint64x1x2_t [[TMP13]] 2775 uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) { 2776 return vld2_lane_u64(a, b, 0); 2777 } 2778 2779 // CHECK-LABEL: define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 2780 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 2781 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 2782 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 2783 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 2784 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 2785 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 2786 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 2787 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 2788 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2789 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 2790 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 2791 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 2792 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 2793 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 2794 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 2795 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 2796 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) 2797 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }* 2798 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] 2799 // CHECK: [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL]] to i8* 2800 // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 2801 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) 2802 // CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8 2803 // CHECK: ret %struct.int8x8x2_t [[TMP8]] 2804 int8x8x2_t test_vld2_lane_s8(int8_t *a, int8x8x2_t b) { 2805 return vld2_lane_s8(a, b, 7); 2806 } 2807 2808 // CHECK-LABEL: define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 2809 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 2810 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 2811 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 2812 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 2813 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 2814 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 2815 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 2816 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 2817 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2818 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 2819 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 2820 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 2821 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 2822 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 2823 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 2824 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 2825 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 2826 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 2827 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 2828 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 2829 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 2830 // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]]) 2831 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }* 2832 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] 2833 // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* 2834 // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 2835 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 2836 // CHECK: [[TMP13:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 2837 // CHECK: ret %struct.int16x4x2_t [[TMP13]] 2838 int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) { 2839 return vld2_lane_s16(a, b, 3); 2840 } 2841 2842 // CHECK-LABEL: define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 2843 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 2844 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 2845 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 2846 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 2847 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 2848 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 2849 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 2850 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 2851 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2852 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 2853 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 2854 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 2855 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 2856 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 2857 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 2858 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 2859 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 2860 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 2861 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 2862 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 2863 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 2864 // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i8(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, i8* [[TMP3]]) 2865 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32> }* 2866 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]] 2867 // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* 2868 // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 2869 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 2870 // CHECK: [[TMP13:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 2871 // CHECK: ret %struct.int32x2x2_t [[TMP13]] 2872 int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) { 2873 return vld2_lane_s32(a, b, 1); 2874 } 2875 2876 // CHECK-LABEL: define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 2877 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 2878 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 2879 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 2880 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 2881 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 2882 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 2883 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* 2884 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* 2885 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2886 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 2887 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 2888 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 2889 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 2890 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 2891 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 2892 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 2893 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 2894 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 2895 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 2896 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 2897 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 2898 // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, i8* [[TMP3]]) 2899 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64> }* 2900 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] 2901 // CHECK: [[TMP11:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* 2902 // CHECK: [[TMP12:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 2903 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 2904 // CHECK: [[TMP13:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 2905 // CHECK: ret %struct.int64x1x2_t [[TMP13]] 2906 int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) { 2907 return vld2_lane_s64(a, b, 0); 2908 } 2909 2910 // CHECK-LABEL: define %struct.float16x4x2_t @test_vld2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 { 2911 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 2912 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 2913 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 2914 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 2915 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 2916 // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 2917 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 2918 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 2919 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2920 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 2921 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 2922 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 2923 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 2924 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 2925 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 2926 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 2927 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 2928 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 2929 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 2930 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 2931 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 2932 // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]]) 2933 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }* 2934 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] 2935 // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* 2936 // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 2937 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 2938 // CHECK: [[TMP13:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 2939 // CHECK: ret %struct.float16x4x2_t [[TMP13]] 2940 float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) { 2941 return vld2_lane_f16(a, b, 3); 2942 } 2943 2944 // CHECK-LABEL: define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 { 2945 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 2946 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 2947 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 2948 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 2949 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 2950 // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 2951 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 2952 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 2953 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2954 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 2955 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 2956 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 2957 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 2958 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 2959 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 2960 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 2961 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 2962 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 2963 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 2964 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 2965 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 2966 // CHECK: [[VLD2_LANE:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0i8(<2 x float> [[TMP8]], <2 x float> [[TMP9]], i64 1, i8* [[TMP3]]) 2967 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float> }* 2968 // CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE]], { <2 x float>, <2 x float> }* [[TMP10]] 2969 // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* 2970 // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 2971 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 2972 // CHECK: [[TMP13:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 2973 // CHECK: ret %struct.float32x2x2_t [[TMP13]] 2974 float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) { 2975 return vld2_lane_f32(a, b, 1); 2976 } 2977 2978 // CHECK-LABEL: define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 { 2979 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 2980 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 2981 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 2982 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 2983 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 2984 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 2985 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* 2986 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* 2987 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 2988 // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 2989 // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* 2990 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 2991 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 2992 // CHECK: [[TMP4:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 2993 // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> 2994 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 2995 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 2996 // CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 2997 // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> 2998 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> 2999 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> 3000 // CHECK: [[VLD2_LANE:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0i8(<1 x double> [[TMP8]], <1 x double> [[TMP9]], i64 0, i8* [[TMP3]]) 3001 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <1 x double>, <1 x double> }* 3002 // CHECK: store { <1 x double>, <1 x double> } [[VLD2_LANE]], { <1 x double>, <1 x double> }* [[TMP10]] 3003 // CHECK: [[TMP11:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* 3004 // CHECK: [[TMP12:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* 3005 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 3006 // CHECK: [[TMP13:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 3007 // CHECK: ret %struct.float64x1x2_t [[TMP13]] 3008 float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) { 3009 return vld2_lane_f64(a, b, 0); 3010 } 3011 3012 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vld2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 3013 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 3014 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 3015 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 3016 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 3017 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 3018 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 3019 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 3020 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 3021 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 3022 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 3023 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 3024 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 3025 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 3026 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 3027 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 3028 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 3029 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) 3030 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }* 3031 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] 3032 // CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL]] to i8* 3033 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 3034 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP6]], i8* [[TMP7]], i64 16, i32 8, i1 false) 3035 // CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8 3036 // CHECK: ret %struct.poly8x8x2_t [[TMP8]] 3037 poly8x8x2_t test_vld2_lane_p8(poly8_t *a, poly8x8x2_t b) { 3038 return vld2_lane_p8(a, b, 7); 3039 } 3040 3041 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vld2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 3042 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 3043 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 3044 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 3045 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 3046 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 3047 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 3048 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 3049 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 3050 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 3051 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 3052 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 3053 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 3054 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 3055 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 3056 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 3057 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 3058 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 3059 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 3060 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 3061 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 3062 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 3063 // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]]) 3064 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }* 3065 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] 3066 // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* 3067 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 3068 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 3069 // CHECK: [[TMP13:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 3070 // CHECK: ret %struct.poly16x4x2_t [[TMP13]] 3071 poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) { 3072 return vld2_lane_p16(a, b, 3); 3073 } 3074 3075 // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 3076 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 3077 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 3078 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 3079 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 3080 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0 3081 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 3082 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* 3083 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* 3084 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 3085 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 3086 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 3087 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 3088 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 3089 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 3090 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 3091 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 3092 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 3093 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 3094 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 3095 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 3096 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 3097 // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, i8* [[TMP3]]) 3098 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64> }* 3099 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] 3100 // CHECK: [[TMP11:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* 3101 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 3102 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) 3103 // CHECK: [[TMP13:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 3104 // CHECK: ret %struct.poly64x1x2_t [[TMP13]] 3105 poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) { 3106 return vld2_lane_p64(a, b, 0); 3107 } 3108 3109 // CHECK-LABEL: define %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 3110 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 3111 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 3112 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 3113 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 3114 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 3115 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 3116 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 3117 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 3118 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3119 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 3120 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 3121 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 3122 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 3123 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 3124 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 3125 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 3126 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 3127 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 3128 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 3129 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 3130 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 3131 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 3132 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 3133 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 3134 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 3135 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 3136 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]]) 3137 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 3138 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] 3139 // CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* 3140 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 3141 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3142 // CHECK: [[TMP16:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 3143 // CHECK: ret %struct.uint16x8x3_t [[TMP16]] 3144 uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) { 3145 return vld3q_lane_u16(a, b, 7); 3146 } 3147 3148 // CHECK-LABEL: define %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 3149 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 3150 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 3151 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 3152 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 3153 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 3154 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 3155 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 3156 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 3157 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3158 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 3159 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 3160 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 3161 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 3162 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 3163 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 3164 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 3165 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 3166 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 3167 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 3168 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 3169 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 3170 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 3171 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 3172 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 3173 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 3174 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 3175 // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i8(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i64 3, i8* [[TMP3]]) 3176 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 3177 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]] 3178 // CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* 3179 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 3180 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3181 // CHECK: [[TMP16:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 3182 // CHECK: ret %struct.uint32x4x3_t [[TMP16]] 3183 uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) { 3184 return vld3q_lane_u32(a, b, 3); 3185 } 3186 3187 // CHECK-LABEL: define %struct.uint64x2x3_t @test_vld3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 3188 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 3189 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 3190 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 3191 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 3192 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 3193 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 3194 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* 3195 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* 3196 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3197 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 3198 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 3199 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 3200 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 3201 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 3202 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 3203 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 3204 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 3205 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 3206 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 3207 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 3208 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 3209 // CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 3210 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 3211 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 3212 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 3213 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 3214 // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, i8* [[TMP3]]) 3215 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 3216 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] 3217 // CHECK: [[TMP14:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* 3218 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* 3219 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3220 // CHECK: [[TMP16:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 3221 // CHECK: ret %struct.uint64x2x3_t [[TMP16]] 3222 uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) { 3223 return vld3q_lane_u64(a, b, 1); 3224 } 3225 3226 // CHECK-LABEL: define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 3227 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 3228 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 3229 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 3230 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 3231 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 3232 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 3233 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 3234 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 3235 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3236 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 3237 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 3238 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 3239 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 3240 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 3241 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 3242 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 3243 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 3244 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 3245 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 3246 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 3247 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 3248 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 3249 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 3250 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 3251 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 3252 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 3253 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]]) 3254 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 3255 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] 3256 // CHECK: [[TMP14:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* 3257 // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 3258 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3259 // CHECK: [[TMP16:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 3260 // CHECK: ret %struct.int16x8x3_t [[TMP16]] 3261 int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) { 3262 return vld3q_lane_s16(a, b, 7); 3263 } 3264 3265 // CHECK-LABEL: define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 3266 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 3267 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 3268 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 3269 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 3270 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 3271 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 3272 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 3273 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 3274 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3275 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 3276 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 3277 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 3278 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 3279 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 3280 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 3281 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 3282 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 3283 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 3284 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 3285 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 3286 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 3287 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 3288 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 3289 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 3290 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 3291 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 3292 // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i8(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i64 3, i8* [[TMP3]]) 3293 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 3294 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]] 3295 // CHECK: [[TMP14:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* 3296 // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 3297 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3298 // CHECK: [[TMP16:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 3299 // CHECK: ret %struct.int32x4x3_t [[TMP16]] 3300 int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) { 3301 return vld3q_lane_s32(a, b, 3); 3302 } 3303 3304 // CHECK-LABEL: define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 3305 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 3306 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 3307 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 3308 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 3309 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 3310 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 3311 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* 3312 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* 3313 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3314 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 3315 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 3316 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 3317 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 3318 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 3319 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 3320 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 3321 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 3322 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 3323 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 3324 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 3325 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 3326 // CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 3327 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 3328 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 3329 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 3330 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 3331 // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, i8* [[TMP3]]) 3332 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 3333 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] 3334 // CHECK: [[TMP14:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* 3335 // CHECK: [[TMP15:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* 3336 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3337 // CHECK: [[TMP16:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 3338 // CHECK: ret %struct.int64x2x3_t [[TMP16]] 3339 int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) { 3340 return vld3q_lane_s64(a, b, 1); 3341 } 3342 3343 // CHECK-LABEL: define %struct.float16x8x3_t @test_vld3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 { 3344 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 3345 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 3346 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 3347 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 3348 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 3349 // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 3350 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 3351 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 3352 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3353 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 3354 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 3355 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 3356 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 3357 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 3358 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 3359 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 3360 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 3361 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 3362 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 3363 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 3364 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 3365 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 3366 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 3367 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 3368 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 3369 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 3370 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]]) 3371 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 3372 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] 3373 // CHECK: [[TMP14:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* 3374 // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 3375 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3376 // CHECK: [[TMP16:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 3377 // CHECK: ret %struct.float16x8x3_t [[TMP16]] 3378 float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) { 3379 return vld3q_lane_f16(a, b, 7); 3380 } 3381 3382 // CHECK-LABEL: define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 { 3383 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 3384 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 3385 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 3386 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 3387 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 3388 // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 3389 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 3390 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 3391 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3392 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 3393 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 3394 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 3395 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 3396 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 3397 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 3398 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 3399 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 3400 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 3401 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 3402 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 3403 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 3404 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 3405 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 3406 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 3407 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 3408 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 3409 // CHECK: [[VLD3_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0i8(<4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i64 3, i8* [[TMP3]]) 3410 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float>, <4 x float> }* 3411 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP13]] 3412 // CHECK: [[TMP14:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* 3413 // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 3414 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3415 // CHECK: [[TMP16:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 3416 // CHECK: ret %struct.float32x4x3_t [[TMP16]] 3417 float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) { 3418 return vld3q_lane_f32(a, b, 3); 3419 } 3420 3421 // CHECK-LABEL: define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 { 3422 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 3423 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 3424 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 3425 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 3426 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 3427 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 3428 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* 3429 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* 3430 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3431 // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 3432 // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* 3433 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 3434 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 3435 // CHECK: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 3436 // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> 3437 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 3438 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 3439 // CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 3440 // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> 3441 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 3442 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 3443 // CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 3444 // CHECK: [[TMP9:%.*]] = bitcast <2 x double> [[TMP8]] to <16 x i8> 3445 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> 3446 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> 3447 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x double> 3448 // CHECK: [[VLD3_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0i8(<2 x double> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]], i64 1, i8* [[TMP3]]) 3449 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x double>, <2 x double>, <2 x double> }* 3450 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP13]] 3451 // CHECK: [[TMP14:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* 3452 // CHECK: [[TMP15:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* 3453 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3454 // CHECK: [[TMP16:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 3455 // CHECK: ret %struct.float64x2x3_t [[TMP16]] 3456 float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) { 3457 return vld3q_lane_f64(a, b, 1); 3458 } 3459 3460 // CHECK-LABEL: define %struct.poly8x16x3_t @test_vld3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 3461 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 3462 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 3463 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 3464 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 3465 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 3466 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 3467 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* 3468 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* 3469 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3470 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 3471 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 3472 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 3473 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 3474 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 3475 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 3476 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 3477 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 3478 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 3479 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 3480 // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %a) 3481 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 3482 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] 3483 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x16x3_t* [[RETVAL]] to i8* 3484 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 3485 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 48, i32 16, i1 false) 3486 // CHECK: [[TMP9:%.*]] = load %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[RETVAL]], align 16 3487 // CHECK: ret %struct.poly8x16x3_t [[TMP9]] 3488 poly8x16x3_t test_vld3q_lane_p8(poly8_t *a, poly8x16x3_t b) { 3489 return vld3q_lane_p8(a, b, 15); 3490 } 3491 3492 // CHECK-LABEL: define %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 3493 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 3494 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 3495 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 3496 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 3497 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 3498 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 3499 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 3500 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 3501 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3502 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 3503 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 3504 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 3505 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 3506 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 3507 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 3508 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 3509 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 3510 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 3511 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 3512 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 3513 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 3514 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 3515 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 3516 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 3517 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 3518 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 3519 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]]) 3520 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 3521 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] 3522 // CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* 3523 // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 3524 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3525 // CHECK: [[TMP16:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 3526 // CHECK: ret %struct.poly16x8x3_t [[TMP16]] 3527 poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) { 3528 return vld3q_lane_p16(a, b, 7); 3529 } 3530 3531 // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 3532 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 3533 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 3534 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 3535 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 3536 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0 3537 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 3538 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* 3539 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* 3540 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 3541 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 3542 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 3543 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 3544 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 3545 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 3546 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 3547 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 3548 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 3549 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 3550 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 3551 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 3552 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 3553 // CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 3554 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 3555 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 3556 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 3557 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 3558 // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, i8* [[TMP3]]) 3559 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 3560 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] 3561 // CHECK: [[TMP14:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* 3562 // CHECK: [[TMP15:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 3563 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) 3564 // CHECK: [[TMP16:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 3565 // CHECK: ret %struct.poly64x2x3_t [[TMP16]] 3566 poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) { 3567 return vld3q_lane_p64(a, b, 1); 3568 } 3569 3570 // CHECK-LABEL: define %struct.uint8x8x3_t @test_vld3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 3571 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 3572 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 3573 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 3574 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 3575 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 3576 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 3577 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 3578 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 3579 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3580 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 3581 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 3582 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 3583 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 3584 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 3585 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 3586 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 3587 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 3588 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 3589 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 3590 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) 3591 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 3592 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] 3593 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x3_t* [[RETVAL]] to i8* 3594 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 3595 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) 3596 // CHECK: [[TMP9:%.*]] = load %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[RETVAL]], align 8 3597 // CHECK: ret %struct.uint8x8x3_t [[TMP9]] 3598 uint8x8x3_t test_vld3_lane_u8(uint8_t *a, uint8x8x3_t b) { 3599 return vld3_lane_u8(a, b, 7); 3600 } 3601 3602 // CHECK-LABEL: define %struct.uint16x4x3_t @test_vld3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 3603 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 3604 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 3605 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 3606 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 3607 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 3608 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 3609 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 3610 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 3611 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3612 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 3613 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 3614 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 3615 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 3616 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 3617 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 3618 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 3619 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 3620 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 3621 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 3622 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 3623 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 3624 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 3625 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 3626 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 3627 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 3628 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 3629 // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]]) 3630 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 3631 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] 3632 // CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* 3633 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 3634 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3635 // CHECK: [[TMP16:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 3636 // CHECK: ret %struct.uint16x4x3_t [[TMP16]] 3637 uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) { 3638 return vld3_lane_u16(a, b, 3); 3639 } 3640 3641 // CHECK-LABEL: define %struct.uint32x2x3_t @test_vld3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 3642 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 3643 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 3644 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 3645 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 3646 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 3647 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 3648 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 3649 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 3650 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3651 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 3652 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 3653 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 3654 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 3655 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 3656 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 3657 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 3658 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 3659 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 3660 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 3661 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 3662 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 3663 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 3664 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 3665 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 3666 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 3667 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 3668 // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i8(<2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i64 1, i8* [[TMP3]]) 3669 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 3670 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]] 3671 // CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* 3672 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 3673 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3674 // CHECK: [[TMP16:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 3675 // CHECK: ret %struct.uint32x2x3_t [[TMP16]] 3676 uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) { 3677 return vld3_lane_u32(a, b, 1); 3678 } 3679 3680 // CHECK-LABEL: define %struct.uint64x1x3_t @test_vld3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 3681 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 3682 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 3683 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 3684 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 3685 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 3686 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 3687 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* 3688 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* 3689 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3690 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 3691 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 3692 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 3693 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 3694 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 3695 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 3696 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 3697 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 3698 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 3699 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 3700 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 3701 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 3702 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 3703 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 3704 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 3705 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 3706 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 3707 // CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, i8* [[TMP3]]) 3708 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 3709 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] 3710 // CHECK: [[TMP14:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* 3711 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 3712 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3713 // CHECK: [[TMP16:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 3714 // CHECK: ret %struct.uint64x1x3_t [[TMP16]] 3715 uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) { 3716 return vld3_lane_u64(a, b, 0); 3717 } 3718 3719 // CHECK-LABEL: define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 3720 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 3721 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 3722 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 3723 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 3724 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 3725 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 3726 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 3727 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 3728 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3729 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 3730 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 3731 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 3732 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 3733 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 3734 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 3735 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 3736 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 3737 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 3738 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 3739 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) 3740 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 3741 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] 3742 // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x3_t* [[RETVAL]] to i8* 3743 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 3744 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) 3745 // CHECK: [[TMP9:%.*]] = load %struct.int8x8x3_t, %struct.int8x8x3_t* [[RETVAL]], align 8 3746 // CHECK: ret %struct.int8x8x3_t [[TMP9]] 3747 int8x8x3_t test_vld3_lane_s8(int8_t *a, int8x8x3_t b) { 3748 return vld3_lane_s8(a, b, 7); 3749 } 3750 3751 // CHECK-LABEL: define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 3752 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 3753 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 3754 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 3755 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 3756 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 3757 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 3758 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 3759 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 3760 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3761 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 3762 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 3763 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 3764 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 3765 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 3766 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 3767 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 3768 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 3769 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 3770 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 3771 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 3772 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 3773 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 3774 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 3775 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 3776 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 3777 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 3778 // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]]) 3779 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 3780 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] 3781 // CHECK: [[TMP14:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* 3782 // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 3783 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3784 // CHECK: [[TMP16:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 3785 // CHECK: ret %struct.int16x4x3_t [[TMP16]] 3786 int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) { 3787 return vld3_lane_s16(a, b, 3); 3788 } 3789 3790 // CHECK-LABEL: define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 3791 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 3792 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 3793 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 3794 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 3795 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 3796 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 3797 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 3798 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 3799 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3800 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 3801 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 3802 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 3803 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 3804 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 3805 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 3806 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 3807 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 3808 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 3809 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 3810 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 3811 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 3812 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 3813 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 3814 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 3815 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 3816 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 3817 // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i8(<2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i64 1, i8* [[TMP3]]) 3818 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 3819 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]] 3820 // CHECK: [[TMP14:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* 3821 // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 3822 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3823 // CHECK: [[TMP16:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 3824 // CHECK: ret %struct.int32x2x3_t [[TMP16]] 3825 int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) { 3826 return vld3_lane_s32(a, b, 1); 3827 } 3828 3829 // CHECK-LABEL: define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 3830 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 3831 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 3832 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 3833 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 3834 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 3835 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 3836 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* 3837 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* 3838 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3839 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 3840 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 3841 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 3842 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 3843 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 3844 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 3845 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 3846 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 3847 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 3848 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 3849 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 3850 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 3851 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 3852 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 3853 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 3854 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 3855 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 3856 // CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, i8* [[TMP3]]) 3857 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 3858 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] 3859 // CHECK: [[TMP14:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* 3860 // CHECK: [[TMP15:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 3861 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3862 // CHECK: [[TMP16:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 3863 // CHECK: ret %struct.int64x1x3_t [[TMP16]] 3864 int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) { 3865 return vld3_lane_s64(a, b, 0); 3866 } 3867 3868 // CHECK-LABEL: define %struct.float16x4x3_t @test_vld3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 { 3869 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 3870 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 3871 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 3872 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 3873 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 3874 // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 3875 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 3876 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 3877 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3878 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 3879 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 3880 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 3881 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 3882 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 3883 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 3884 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 3885 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 3886 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 3887 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 3888 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 3889 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 3890 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 3891 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 3892 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 3893 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 3894 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 3895 // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]]) 3896 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 3897 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] 3898 // CHECK: [[TMP14:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* 3899 // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 3900 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3901 // CHECK: [[TMP16:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 3902 // CHECK: ret %struct.float16x4x3_t [[TMP16]] 3903 float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) { 3904 return vld3_lane_f16(a, b, 3); 3905 } 3906 3907 // CHECK-LABEL: define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 { 3908 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 3909 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 3910 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 3911 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 3912 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 3913 // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 3914 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 3915 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 3916 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3917 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 3918 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 3919 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 3920 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 3921 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 3922 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 3923 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 3924 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 3925 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 3926 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 3927 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 3928 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 3929 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 3930 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 3931 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 3932 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 3933 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 3934 // CHECK: [[VLD3_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0i8(<2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i64 1, i8* [[TMP3]]) 3935 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float>, <2 x float> }* 3936 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP13]] 3937 // CHECK: [[TMP14:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* 3938 // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 3939 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3940 // CHECK: [[TMP16:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 3941 // CHECK: ret %struct.float32x2x3_t [[TMP16]] 3942 float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) { 3943 return vld3_lane_f32(a, b, 1); 3944 } 3945 3946 // CHECK-LABEL: define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 { 3947 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 3948 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 3949 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 3950 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 3951 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 3952 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 3953 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* 3954 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* 3955 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3956 // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 3957 // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* 3958 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 3959 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 3960 // CHECK: [[TMP4:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 3961 // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> 3962 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 3963 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 3964 // CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 3965 // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> 3966 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 3967 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 3968 // CHECK: [[TMP8:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 3969 // CHECK: [[TMP9:%.*]] = bitcast <1 x double> [[TMP8]] to <8 x i8> 3970 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> 3971 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> 3972 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x double> 3973 // CHECK: [[VLD3_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0i8(<1 x double> [[TMP10]], <1 x double> [[TMP11]], <1 x double> [[TMP12]], i64 0, i8* [[TMP3]]) 3974 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <1 x double>, <1 x double>, <1 x double> }* 3975 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP13]] 3976 // CHECK: [[TMP14:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* 3977 // CHECK: [[TMP15:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* 3978 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 3979 // CHECK: [[TMP16:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 3980 // CHECK: ret %struct.float64x1x3_t [[TMP16]] 3981 float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) { 3982 return vld3_lane_f64(a, b, 0); 3983 } 3984 3985 // CHECK-LABEL: define %struct.poly8x8x3_t @test_vld3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 3986 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 3987 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 3988 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 3989 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 3990 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 3991 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 3992 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 3993 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 3994 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 3995 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 3996 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 3997 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 3998 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 3999 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 4000 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 4001 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 4002 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 4003 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 4004 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 4005 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) 4006 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 4007 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] 4008 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x3_t* [[RETVAL]] to i8* 4009 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 4010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 24, i32 8, i1 false) 4011 // CHECK: [[TMP9:%.*]] = load %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[RETVAL]], align 8 4012 // CHECK: ret %struct.poly8x8x3_t [[TMP9]] 4013 poly8x8x3_t test_vld3_lane_p8(poly8_t *a, poly8x8x3_t b) { 4014 return vld3_lane_p8(a, b, 7); 4015 } 4016 4017 // CHECK-LABEL: define %struct.poly16x4x3_t @test_vld3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 4018 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 4019 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 4020 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 4021 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 4022 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 4023 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 4024 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 4025 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 4026 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 4027 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 4028 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 4029 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 4030 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 4031 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 4032 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 4033 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 4034 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 4035 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 4036 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 4037 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 4038 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 4039 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 4040 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 4041 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 4042 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 4043 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 4044 // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]]) 4045 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 4046 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] 4047 // CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* 4048 // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 4049 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 4050 // CHECK: [[TMP16:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 4051 // CHECK: ret %struct.poly16x4x3_t [[TMP16]] 4052 poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) { 4053 return vld3_lane_p16(a, b, 3); 4054 } 4055 4056 // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 4057 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 4058 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 4059 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 4060 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 4061 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0 4062 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 4063 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* 4064 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* 4065 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 4066 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 4067 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 4068 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 4069 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 4070 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 4071 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 4072 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 4073 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 4074 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 4075 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 4076 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 4077 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 4078 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 4079 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 4080 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 4081 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 4082 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 4083 // CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, i8* [[TMP3]]) 4084 // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 4085 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] 4086 // CHECK: [[TMP14:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* 4087 // CHECK: [[TMP15:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 4088 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) 4089 // CHECK: [[TMP16:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 4090 // CHECK: ret %struct.poly64x1x3_t [[TMP16]] 4091 poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { 4092 return vld3_lane_p64(a, b, 0); 4093 } 4094 4095 // CHECK-LABEL: define %struct.uint8x16x4_t @test_vld4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 4096 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 4097 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 4098 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 4099 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 4100 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 4101 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 4102 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* 4103 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* 4104 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4105 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 4106 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 4107 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 4108 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 4109 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 4110 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 4111 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 4112 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 4113 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 4114 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 4115 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 4116 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 4117 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 4118 // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, i8* %a) 4119 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 4120 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] 4121 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x16x4_t* [[RETVAL]] to i8* 4122 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 4123 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) 4124 // CHECK: [[TMP10:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 4125 // CHECK: ret %struct.uint8x16x4_t [[TMP10]] 4126 uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) { 4127 return vld4q_lane_u8(a, b, 15); 4128 } 4129 4130 // CHECK-LABEL: define %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 4131 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 4132 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 4133 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 4134 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 4135 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 4136 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 4137 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 4138 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 4139 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4140 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 4141 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 4142 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 4143 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 4144 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 4145 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 4146 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 4147 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 4148 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 4149 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 4150 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 4151 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 4152 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 4153 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 4154 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 4155 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 4156 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 4157 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 4158 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 4159 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 4160 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 4161 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 4162 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]]) 4163 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 4164 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] 4165 // CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* 4166 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 4167 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4168 // CHECK: [[TMP19:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 4169 // CHECK: ret %struct.uint16x8x4_t [[TMP19]] 4170 uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) { 4171 return vld4q_lane_u16(a, b, 7); 4172 } 4173 4174 // CHECK-LABEL: define %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 4175 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 4176 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 4177 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 4178 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 4179 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 4180 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 4181 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 4182 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 4183 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4184 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 4185 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 4186 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 4187 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 4188 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 4189 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 4190 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 4191 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 4192 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 4193 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 4194 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 4195 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 4196 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 4197 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 4198 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 4199 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 4200 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 4201 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 4202 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 4203 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 4204 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 4205 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 4206 // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i8(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i64 3, i8* [[TMP3]]) 4207 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 4208 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]] 4209 // CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* 4210 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 4211 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4212 // CHECK: [[TMP19:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 4213 // CHECK: ret %struct.uint32x4x4_t [[TMP19]] 4214 uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) { 4215 return vld4q_lane_u32(a, b, 3); 4216 } 4217 4218 // CHECK-LABEL: define %struct.uint64x2x4_t @test_vld4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 4219 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 4220 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 4221 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 4222 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 4223 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 4224 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 4225 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* 4226 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* 4227 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4228 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 4229 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 4230 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 4231 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 4232 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 4233 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 4234 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 4235 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 4236 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 4237 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 4238 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 4239 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 4240 // CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 4241 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 4242 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 4243 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 4244 // CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 4245 // CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> 4246 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 4247 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 4248 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 4249 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> 4250 // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, i8* [[TMP3]]) 4251 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 4252 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] 4253 // CHECK: [[TMP17:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* 4254 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* 4255 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4256 // CHECK: [[TMP19:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 4257 // CHECK: ret %struct.uint64x2x4_t [[TMP19]] 4258 uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { 4259 return vld4q_lane_u64(a, b, 1); 4260 } 4261 4262 // CHECK-LABEL: define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 4263 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 4264 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 4265 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 4266 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 4267 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 4268 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 4269 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* 4270 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* 4271 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4272 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 4273 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 4274 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 4275 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 4276 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 4277 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 4278 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 4279 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 4280 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 4281 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 4282 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 4283 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 4284 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 4285 // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, i8* %a) 4286 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 4287 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] 4288 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x16x4_t* [[RETVAL]] to i8* 4289 // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 4290 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) 4291 // CHECK: [[TMP10:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 4292 // CHECK: ret %struct.int8x16x4_t [[TMP10]] 4293 int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) { 4294 return vld4q_lane_s8(a, b, 15); 4295 } 4296 4297 // CHECK-LABEL: define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 4298 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 4299 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 4300 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 4301 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 4302 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 4303 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 4304 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 4305 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 4306 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4307 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 4308 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 4309 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 4310 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 4311 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 4312 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 4313 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 4314 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 4315 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 4316 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 4317 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 4318 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 4319 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 4320 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 4321 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 4322 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 4323 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 4324 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 4325 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 4326 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 4327 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 4328 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 4329 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]]) 4330 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 4331 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] 4332 // CHECK: [[TMP17:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* 4333 // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 4334 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4335 // CHECK: [[TMP19:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 4336 // CHECK: ret %struct.int16x8x4_t [[TMP19]] 4337 int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) { 4338 return vld4q_lane_s16(a, b, 7); 4339 } 4340 4341 // CHECK-LABEL: define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 4342 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 4343 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 4344 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 4345 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 4346 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 4347 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 4348 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 4349 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 4350 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4351 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 4352 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 4353 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 4354 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 4355 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 4356 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 4357 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 4358 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 4359 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 4360 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 4361 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 4362 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 4363 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 4364 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 4365 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 4366 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 4367 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 4368 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 4369 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 4370 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 4371 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 4372 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 4373 // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i8(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i64 3, i8* [[TMP3]]) 4374 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 4375 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]] 4376 // CHECK: [[TMP17:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* 4377 // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 4378 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4379 // CHECK: [[TMP19:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 4380 // CHECK: ret %struct.int32x4x4_t [[TMP19]] 4381 int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) { 4382 return vld4q_lane_s32(a, b, 3); 4383 } 4384 4385 // CHECK-LABEL: define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 4386 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 4387 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 4388 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 4389 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 4390 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 4391 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 4392 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* 4393 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* 4394 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4395 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 4396 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 4397 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 4398 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 4399 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 4400 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 4401 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 4402 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 4403 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 4404 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 4405 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 4406 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 4407 // CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 4408 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 4409 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 4410 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 4411 // CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 4412 // CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> 4413 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 4414 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 4415 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 4416 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> 4417 // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, i8* [[TMP3]]) 4418 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 4419 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] 4420 // CHECK: [[TMP17:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* 4421 // CHECK: [[TMP18:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* 4422 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4423 // CHECK: [[TMP19:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 4424 // CHECK: ret %struct.int64x2x4_t [[TMP19]] 4425 int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) { 4426 return vld4q_lane_s64(a, b, 1); 4427 } 4428 4429 // CHECK-LABEL: define %struct.float16x8x4_t @test_vld4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 { 4430 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 4431 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 4432 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 4433 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 4434 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 4435 // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 4436 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 4437 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 4438 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4439 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 4440 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 4441 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 4442 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 4443 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 4444 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 4445 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 4446 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 4447 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 4448 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 4449 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 4450 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 4451 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 4452 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 4453 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 4454 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 4455 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 4456 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> 4457 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 4458 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 4459 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 4460 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 4461 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]]) 4462 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 4463 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] 4464 // CHECK: [[TMP17:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* 4465 // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 4466 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4467 // CHECK: [[TMP19:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 4468 // CHECK: ret %struct.float16x8x4_t [[TMP19]] 4469 float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) { 4470 return vld4q_lane_f16(a, b, 7); 4471 } 4472 4473 // CHECK-LABEL: define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 { 4474 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 4475 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 4476 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 4477 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 4478 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 4479 // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 4480 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 4481 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 4482 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4483 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 4484 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 4485 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 4486 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 4487 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 4488 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 4489 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 4490 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 4491 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 4492 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 4493 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 4494 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 4495 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 4496 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 4497 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 4498 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 4499 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 4500 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> 4501 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 4502 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 4503 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 4504 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float> 4505 // CHECK: [[VLD4_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0i8(<4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i64 3, i8* [[TMP3]]) 4506 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 4507 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP16]] 4508 // CHECK: [[TMP17:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* 4509 // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 4510 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4511 // CHECK: [[TMP19:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 4512 // CHECK: ret %struct.float32x4x4_t [[TMP19]] 4513 float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) { 4514 return vld4q_lane_f32(a, b, 3); 4515 } 4516 4517 // CHECK-LABEL: define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 { 4518 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 4519 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 4520 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 4521 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 4522 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 4523 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 4524 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* 4525 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* 4526 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4527 // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 4528 // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* 4529 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 4530 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 4531 // CHECK: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 4532 // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> 4533 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 4534 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 4535 // CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 4536 // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> 4537 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 4538 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 4539 // CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 4540 // CHECK: [[TMP9:%.*]] = bitcast <2 x double> [[TMP8]] to <16 x i8> 4541 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 4542 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 4543 // CHECK: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 4544 // CHECK: [[TMP11:%.*]] = bitcast <2 x double> [[TMP10]] to <16 x i8> 4545 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> 4546 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> 4547 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x double> 4548 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x double> 4549 // CHECK: [[VLD4_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0i8(<2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], <2 x double> [[TMP15]], i64 1, i8* [[TMP3]]) 4550 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* 4551 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP16]] 4552 // CHECK: [[TMP17:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* 4553 // CHECK: [[TMP18:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* 4554 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4555 // CHECK: [[TMP19:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 4556 // CHECK: ret %struct.float64x2x4_t [[TMP19]] 4557 float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { 4558 return vld4q_lane_f64(a, b, 1); 4559 } 4560 4561 // CHECK-LABEL: define %struct.poly8x16x4_t @test_vld4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 4562 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 4563 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 4564 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 4565 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 4566 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 4567 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 4568 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* 4569 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* 4570 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4571 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 4572 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 4573 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 4574 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 4575 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 4576 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 4577 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 4578 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 4579 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 4580 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 4581 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 4582 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 4583 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 4584 // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, i8* %a) 4585 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 4586 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] 4587 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x16x4_t* [[RETVAL]] to i8* 4588 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 4589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) 4590 // CHECK: [[TMP10:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 4591 // CHECK: ret %struct.poly8x16x4_t [[TMP10]] 4592 poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) { 4593 return vld4q_lane_p8(a, b, 15); 4594 } 4595 4596 // CHECK-LABEL: define %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 4597 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 4598 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 4599 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 4600 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 4601 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 4602 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 4603 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 4604 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 4605 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4606 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 4607 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 4608 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 4609 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 4610 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 4611 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 4612 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 4613 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 4614 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 4615 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 4616 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 4617 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 4618 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 4619 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 4620 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 4621 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 4622 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 4623 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 4624 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 4625 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 4626 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 4627 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 4628 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]]) 4629 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 4630 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] 4631 // CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* 4632 // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 4633 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4634 // CHECK: [[TMP19:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 4635 // CHECK: ret %struct.poly16x8x4_t [[TMP19]] 4636 poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) { 4637 return vld4q_lane_p16(a, b, 7); 4638 } 4639 4640 // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 4641 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 4642 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 4643 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 4644 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 4645 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0 4646 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 4647 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* 4648 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* 4649 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 4650 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 4651 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 4652 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 4653 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 4654 // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 4655 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 4656 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 4657 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 4658 // CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 4659 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 4660 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 4661 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 4662 // CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 4663 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 4664 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 4665 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 4666 // CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 4667 // CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> 4668 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 4669 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 4670 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 4671 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> 4672 // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, i8* [[TMP3]]) 4673 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 4674 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] 4675 // CHECK: [[TMP17:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* 4676 // CHECK: [[TMP18:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 4677 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) 4678 // CHECK: [[TMP19:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 4679 // CHECK: ret %struct.poly64x2x4_t [[TMP19]] 4680 poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { 4681 return vld4q_lane_p64(a, b, 1); 4682 } 4683 4684 // CHECK-LABEL: define %struct.uint8x8x4_t @test_vld4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 4685 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 4686 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 4687 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 4688 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 4689 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 4690 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 4691 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 4692 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 4693 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 4694 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 4695 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 4696 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 4697 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 4698 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 4699 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 4700 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 4701 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 4702 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 4703 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 4704 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 4705 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 4706 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 4707 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a) 4708 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 4709 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 4710 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x4_t* [[RETVAL]] to i8* 4711 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 4712 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) 4713 // CHECK: [[TMP10:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 4714 // CHECK: ret %struct.uint8x8x4_t [[TMP10]] 4715 uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) { 4716 return vld4_lane_u8(a, b, 7); 4717 } 4718 4719 // CHECK-LABEL: define %struct.uint16x4x4_t @test_vld4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 4720 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 4721 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 4722 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 4723 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 4724 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 4725 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 4726 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 4727 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 4728 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 4729 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 4730 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 4731 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 4732 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 4733 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 4734 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 4735 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 4736 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 4737 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 4738 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 4739 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 4740 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 4741 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 4742 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 4743 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 4744 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 4745 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 4746 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 4747 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 4748 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 4749 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 4750 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 4751 // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]]) 4752 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 4753 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] 4754 // CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* 4755 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 4756 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 4757 // CHECK: [[TMP19:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 4758 // CHECK: ret %struct.uint16x4x4_t [[TMP19]] 4759 uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) { 4760 return vld4_lane_u16(a, b, 3); 4761 } 4762 4763 // CHECK-LABEL: define %struct.uint32x2x4_t @test_vld4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 4764 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 4765 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 4766 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 4767 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 4768 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 4769 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 4770 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 4771 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 4772 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 4773 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 4774 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 4775 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 4776 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 4777 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 4778 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 4779 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 4780 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 4781 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 4782 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 4783 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 4784 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 4785 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 4786 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 4787 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 4788 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 4789 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 4790 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 4791 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 4792 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 4793 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 4794 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 4795 // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i8(<2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i64 1, i8* [[TMP3]]) 4796 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 4797 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]] 4798 // CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* 4799 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 4800 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 4801 // CHECK: [[TMP19:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 4802 // CHECK: ret %struct.uint32x2x4_t [[TMP19]] 4803 uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) { 4804 return vld4_lane_u32(a, b, 1); 4805 } 4806 4807 // CHECK-LABEL: define %struct.uint64x1x4_t @test_vld4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 4808 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 4809 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 4810 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 4811 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 4812 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 4813 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 4814 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* 4815 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* 4816 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 4817 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 4818 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 4819 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 4820 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 4821 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 4822 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 4823 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 4824 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 4825 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 4826 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 4827 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 4828 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 4829 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 4830 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 4831 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 4832 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 4833 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 4834 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 4835 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 4836 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 4837 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 4838 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 4839 // CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, i8* [[TMP3]]) 4840 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 4841 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] 4842 // CHECK: [[TMP17:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* 4843 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 4844 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 4845 // CHECK: [[TMP19:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 4846 // CHECK: ret %struct.uint64x1x4_t [[TMP19]] 4847 uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { 4848 return vld4_lane_u64(a, b, 0); 4849 } 4850 4851 // CHECK-LABEL: define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 4852 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 4853 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 4854 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 4855 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 4856 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 4857 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 4858 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 4859 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 4860 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 4861 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 4862 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 4863 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 4864 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 4865 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 4866 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 4867 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 4868 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 4869 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 4870 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 4871 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 4872 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 4873 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 4874 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a) 4875 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 4876 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 4877 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x4_t* [[RETVAL]] to i8* 4878 // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 4879 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) 4880 // CHECK: [[TMP10:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 4881 // CHECK: ret %struct.int8x8x4_t [[TMP10]] 4882 int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) { 4883 return vld4_lane_s8(a, b, 7); 4884 } 4885 4886 // CHECK-LABEL: define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 4887 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 4888 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 4889 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 4890 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 4891 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 4892 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 4893 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 4894 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 4895 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 4896 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 4897 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 4898 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 4899 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 4900 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 4901 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 4902 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 4903 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 4904 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 4905 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 4906 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 4907 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 4908 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 4909 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 4910 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 4911 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 4912 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 4913 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 4914 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 4915 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 4916 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 4917 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 4918 // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]]) 4919 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 4920 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] 4921 // CHECK: [[TMP17:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* 4922 // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 4923 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 4924 // CHECK: [[TMP19:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 4925 // CHECK: ret %struct.int16x4x4_t [[TMP19]] 4926 int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) { 4927 return vld4_lane_s16(a, b, 3); 4928 } 4929 4930 // CHECK-LABEL: define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 4931 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 4932 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 4933 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 4934 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 4935 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 4936 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 4937 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 4938 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 4939 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 4940 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 4941 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 4942 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 4943 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 4944 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 4945 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 4946 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 4947 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 4948 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 4949 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 4950 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 4951 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 4952 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 4953 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 4954 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 4955 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 4956 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 4957 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 4958 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 4959 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 4960 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 4961 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 4962 // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i8(<2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i64 1, i8* [[TMP3]]) 4963 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 4964 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]] 4965 // CHECK: [[TMP17:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* 4966 // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 4967 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 4968 // CHECK: [[TMP19:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 4969 // CHECK: ret %struct.int32x2x4_t [[TMP19]] 4970 int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) { 4971 return vld4_lane_s32(a, b, 1); 4972 } 4973 4974 // CHECK-LABEL: define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 4975 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 4976 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 4977 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 4978 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 4979 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 4980 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 4981 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* 4982 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* 4983 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 4984 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 4985 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 4986 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 4987 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 4988 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 4989 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 4990 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 4991 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 4992 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 4993 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 4994 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 4995 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 4996 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 4997 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 4998 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 4999 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 5000 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 5001 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 5002 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 5003 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 5004 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 5005 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 5006 // CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, i8* [[TMP3]]) 5007 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 5008 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] 5009 // CHECK: [[TMP17:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* 5010 // CHECK: [[TMP18:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 5011 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 5012 // CHECK: [[TMP19:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 5013 // CHECK: ret %struct.int64x1x4_t [[TMP19]] 5014 int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) { 5015 return vld4_lane_s64(a, b, 0); 5016 } 5017 5018 // CHECK-LABEL: define %struct.float16x4x4_t @test_vld4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 { 5019 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 5020 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 5021 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 5022 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 5023 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 5024 // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 5025 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 5026 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 5027 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 5028 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 5029 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 5030 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 5031 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 5032 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 5033 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 5034 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 5035 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 5036 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 5037 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 5038 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 5039 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 5040 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 5041 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 5042 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 5043 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 5044 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 5045 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8> 5046 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 5047 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 5048 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 5049 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 5050 // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]]) 5051 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 5052 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] 5053 // CHECK: [[TMP17:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* 5054 // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 5055 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 5056 // CHECK: [[TMP19:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 5057 // CHECK: ret %struct.float16x4x4_t [[TMP19]] 5058 float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) { 5059 return vld4_lane_f16(a, b, 3); 5060 } 5061 5062 // CHECK-LABEL: define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 { 5063 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 5064 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 5065 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 5066 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 5067 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 5068 // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 5069 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 5070 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 5071 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 5072 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 5073 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 5074 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 5075 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 5076 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 5077 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 5078 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 5079 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 5080 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 5081 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 5082 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 5083 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 5084 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 5085 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 5086 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 5087 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 5088 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 5089 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8> 5090 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 5091 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 5092 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 5093 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float> 5094 // CHECK: [[VLD4_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0i8(<2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i64 1, i8* [[TMP3]]) 5095 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 5096 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP16]] 5097 // CHECK: [[TMP17:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* 5098 // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 5099 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 5100 // CHECK: [[TMP19:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 5101 // CHECK: ret %struct.float32x2x4_t [[TMP19]] 5102 float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) { 5103 return vld4_lane_f32(a, b, 1); 5104 } 5105 5106 // CHECK-LABEL: define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 { 5107 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 5108 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 5109 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 5110 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 5111 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 5112 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 5113 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* 5114 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* 5115 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 5116 // CHECK: [[TMP2:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 5117 // CHECK: [[TMP3:%.*]] = bitcast double* %a to i8* 5118 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 5119 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 5120 // CHECK: [[TMP4:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 5121 // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> 5122 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 5123 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 5124 // CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 5125 // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> 5126 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 5127 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 5128 // CHECK: [[TMP8:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 5129 // CHECK: [[TMP9:%.*]] = bitcast <1 x double> [[TMP8]] to <8 x i8> 5130 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 5131 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 5132 // CHECK: [[TMP10:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 5133 // CHECK: [[TMP11:%.*]] = bitcast <1 x double> [[TMP10]] to <8 x i8> 5134 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> 5135 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> 5136 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x double> 5137 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x double> 5138 // CHECK: [[VLD4_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0i8(<1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], <1 x double> [[TMP15]], i64 0, i8* [[TMP3]]) 5139 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* 5140 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP16]] 5141 // CHECK: [[TMP17:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* 5142 // CHECK: [[TMP18:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* 5143 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 5144 // CHECK: [[TMP19:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 5145 // CHECK: ret %struct.float64x1x4_t [[TMP19]] 5146 float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { 5147 return vld4_lane_f64(a, b, 0); 5148 } 5149 5150 // CHECK-LABEL: define %struct.poly8x8x4_t @test_vld4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 5151 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 5152 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 5153 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 5154 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 5155 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 5156 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 5157 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 5158 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 5159 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 5160 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 5161 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 5162 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 5163 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 5164 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 5165 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 5166 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 5167 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 5168 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 5169 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 5170 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 5171 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 5172 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 5173 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a) 5174 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 5175 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 5176 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x4_t* [[RETVAL]] to i8* 5177 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 5178 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) 5179 // CHECK: [[TMP10:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 5180 // CHECK: ret %struct.poly8x8x4_t [[TMP10]] 5181 poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) { 5182 return vld4_lane_p8(a, b, 7); 5183 } 5184 5185 // CHECK-LABEL: define %struct.poly16x4x4_t @test_vld4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 5186 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 5187 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 5188 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 5189 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 5190 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 5191 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 5192 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 5193 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 5194 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 5195 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 5196 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 5197 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 5198 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 5199 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 5200 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 5201 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 5202 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 5203 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 5204 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 5205 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 5206 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 5207 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 5208 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 5209 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 5210 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 5211 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 5212 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 5213 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 5214 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 5215 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 5216 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 5217 // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]]) 5218 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 5219 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] 5220 // CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* 5221 // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 5222 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 5223 // CHECK: [[TMP19:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 5224 // CHECK: ret %struct.poly16x4x4_t [[TMP19]] 5225 poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) { 5226 return vld4_lane_p16(a, b, 3); 5227 } 5228 5229 // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 5230 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 5231 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 5232 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 5233 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 5234 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0 5235 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 5236 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* 5237 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* 5238 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 5239 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 5240 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 5241 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 5242 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 5243 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 5244 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 5245 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 5246 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 5247 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 5248 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 5249 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 5250 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 5251 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 5252 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 5253 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 5254 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 5255 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 5256 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 5257 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 5258 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 5259 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 5260 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 5261 // CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, i8* [[TMP3]]) 5262 // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 5263 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] 5264 // CHECK: [[TMP17:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* 5265 // CHECK: [[TMP18:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 5266 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) 5267 // CHECK: [[TMP19:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 5268 // CHECK: ret %struct.poly64x1x4_t [[TMP19]] 5269 poly64x1x4_t test_vld4_lane_p64(poly64_t *a, poly64x1x4_t b) { 5270 return vld4_lane_p64(a, b, 0); 5271 } 5272 5273 // CHECK-LABEL: define void @test_vst1q_lane_u8(i8* %a, <16 x i8> %b) #0 { 5274 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 5275 // CHECK: store i8 [[TMP0]], i8* %a 5276 // CHECK: ret void 5277 void test_vst1q_lane_u8(uint8_t *a, uint8x16_t b) { 5278 vst1q_lane_u8(a, b, 15); 5279 } 5280 5281 // CHECK-LABEL: define void @test_vst1q_lane_u16(i16* %a, <8 x i16> %b) #0 { 5282 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 5283 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5284 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5285 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 5286 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 5287 // CHECK: store i16 [[TMP3]], i16* [[TMP4]] 5288 // CHECK: ret void 5289 void test_vst1q_lane_u16(uint16_t *a, uint16x8_t b) { 5290 vst1q_lane_u16(a, b, 7); 5291 } 5292 5293 // CHECK-LABEL: define void @test_vst1q_lane_u32(i32* %a, <4 x i32> %b) #0 { 5294 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 5295 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5296 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5297 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 5298 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 5299 // CHECK: store i32 [[TMP3]], i32* [[TMP4]] 5300 // CHECK: ret void 5301 void test_vst1q_lane_u32(uint32_t *a, uint32x4_t b) { 5302 vst1q_lane_u32(a, b, 3); 5303 } 5304 5305 // CHECK-LABEL: define void @test_vst1q_lane_u64(i64* %a, <2 x i64> %b) #0 { 5306 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 5307 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5308 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5309 // CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 5310 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 5311 // CHECK: store i64 [[TMP3]], i64* [[TMP4]] 5312 // CHECK: ret void 5313 void test_vst1q_lane_u64(uint64_t *a, uint64x2_t b) { 5314 vst1q_lane_u64(a, b, 1); 5315 } 5316 5317 // CHECK-LABEL: define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) #0 { 5318 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 5319 // CHECK: store i8 [[TMP0]], i8* %a 5320 // CHECK: ret void 5321 void test_vst1q_lane_s8(int8_t *a, int8x16_t b) { 5322 vst1q_lane_s8(a, b, 15); 5323 } 5324 5325 // CHECK-LABEL: define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) #0 { 5326 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 5327 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5328 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5329 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 5330 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 5331 // CHECK: store i16 [[TMP3]], i16* [[TMP4]] 5332 // CHECK: ret void 5333 void test_vst1q_lane_s16(int16_t *a, int16x8_t b) { 5334 vst1q_lane_s16(a, b, 7); 5335 } 5336 5337 // CHECK-LABEL: define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) #0 { 5338 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 5339 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5340 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5341 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 5342 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 5343 // CHECK: store i32 [[TMP3]], i32* [[TMP4]] 5344 // CHECK: ret void 5345 void test_vst1q_lane_s32(int32_t *a, int32x4_t b) { 5346 vst1q_lane_s32(a, b, 3); 5347 } 5348 5349 // CHECK-LABEL: define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) #0 { 5350 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 5351 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5352 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5353 // CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 5354 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 5355 // CHECK: store i64 [[TMP3]], i64* [[TMP4]] 5356 // CHECK: ret void 5357 void test_vst1q_lane_s64(int64_t *a, int64x2_t b) { 5358 vst1q_lane_s64(a, b, 1); 5359 } 5360 5361 // CHECK-LABEL: define void @test_vst1q_lane_f16(half* %a, <8 x half> %b) #0 { 5362 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 5363 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 5364 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5365 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 5366 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 5367 // CHECK: store i16 [[TMP3]], i16* [[TMP4]] 5368 // CHECK: ret void 5369 void test_vst1q_lane_f16(float16_t *a, float16x8_t b) { 5370 vst1q_lane_f16(a, b, 7); 5371 } 5372 5373 // CHECK-LABEL: define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) #0 { 5374 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 5375 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 5376 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 5377 // CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 5378 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float* 5379 // CHECK: store float [[TMP3]], float* [[TMP4]] 5380 // CHECK: ret void 5381 void test_vst1q_lane_f32(float32_t *a, float32x4_t b) { 5382 vst1q_lane_f32(a, b, 3); 5383 } 5384 5385 // CHECK-LABEL: define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) #0 { 5386 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 5387 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 5388 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 5389 // CHECK: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 5390 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to double* 5391 // CHECK: store double [[TMP3]], double* [[TMP4]] 5392 // CHECK: ret void 5393 void test_vst1q_lane_f64(float64_t *a, float64x2_t b) { 5394 vst1q_lane_f64(a, b, 1); 5395 } 5396 5397 // CHECK-LABEL: define void @test_vst1q_lane_p8(i8* %a, <16 x i8> %b) #0 { 5398 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 5399 // CHECK: store i8 [[TMP0]], i8* %a 5400 // CHECK: ret void 5401 void test_vst1q_lane_p8(poly8_t *a, poly8x16_t b) { 5402 vst1q_lane_p8(a, b, 15); 5403 } 5404 5405 // CHECK-LABEL: define void @test_vst1q_lane_p16(i16* %a, <8 x i16> %b) #0 { 5406 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 5407 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5408 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5409 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 5410 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 5411 // CHECK: store i16 [[TMP3]], i16* [[TMP4]] 5412 // CHECK: ret void 5413 void test_vst1q_lane_p16(poly16_t *a, poly16x8_t b) { 5414 vst1q_lane_p16(a, b, 7); 5415 } 5416 5417 // CHECK-LABEL: define void @test_vst1q_lane_p64(i64* %a, <2 x i64> %b) #0 { 5418 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 5419 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5420 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5421 // CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 5422 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 5423 // CHECK: store i64 [[TMP3]], i64* [[TMP4]] 5424 // CHECK: ret void 5425 void test_vst1q_lane_p64(poly64_t *a, poly64x2_t b) { 5426 vst1q_lane_p64(a, b, 1); 5427 } 5428 5429 // CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #0 { 5430 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 5431 // CHECK: store i8 [[TMP0]], i8* %a 5432 // CHECK: ret void 5433 void test_vst1_lane_u8(uint8_t *a, uint8x8_t b) { 5434 vst1_lane_u8(a, b, 7); 5435 } 5436 5437 // CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #0 { 5438 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 5439 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5440 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5441 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 5442 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 5443 // CHECK: store i16 [[TMP3]], i16* [[TMP4]] 5444 // CHECK: ret void 5445 void test_vst1_lane_u16(uint16_t *a, uint16x4_t b) { 5446 vst1_lane_u16(a, b, 3); 5447 } 5448 5449 // CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #0 { 5450 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 5451 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5452 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5453 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 5454 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 5455 // CHECK: store i32 [[TMP3]], i32* [[TMP4]] 5456 // CHECK: ret void 5457 void test_vst1_lane_u32(uint32_t *a, uint32x2_t b) { 5458 vst1_lane_u32(a, b, 1); 5459 } 5460 5461 // CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #0 { 5462 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 5463 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 5464 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 5465 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 5466 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 5467 // CHECK: store i64 [[TMP3]], i64* [[TMP4]] 5468 // CHECK: ret void 5469 void test_vst1_lane_u64(uint64_t *a, uint64x1_t b) { 5470 vst1_lane_u64(a, b, 0); 5471 } 5472 5473 // CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #0 { 5474 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 5475 // CHECK: store i8 [[TMP0]], i8* %a 5476 // CHECK: ret void 5477 void test_vst1_lane_s8(int8_t *a, int8x8_t b) { 5478 vst1_lane_s8(a, b, 7); 5479 } 5480 5481 // CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #0 { 5482 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 5483 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5484 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5485 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 5486 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 5487 // CHECK: store i16 [[TMP3]], i16* [[TMP4]] 5488 // CHECK: ret void 5489 void test_vst1_lane_s16(int16_t *a, int16x4_t b) { 5490 vst1_lane_s16(a, b, 3); 5491 } 5492 5493 // CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #0 { 5494 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 5495 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5496 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5497 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 5498 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 5499 // CHECK: store i32 [[TMP3]], i32* [[TMP4]] 5500 // CHECK: ret void 5501 void test_vst1_lane_s32(int32_t *a, int32x2_t b) { 5502 vst1_lane_s32(a, b, 1); 5503 } 5504 5505 // CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #0 { 5506 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 5507 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 5508 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 5509 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 5510 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 5511 // CHECK: store i64 [[TMP3]], i64* [[TMP4]] 5512 // CHECK: ret void 5513 void test_vst1_lane_s64(int64_t *a, int64x1_t b) { 5514 vst1_lane_s64(a, b, 0); 5515 } 5516 5517 // CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 { 5518 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 5519 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 5520 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5521 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 5522 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 5523 // CHECK: store i16 [[TMP3]], i16* [[TMP4]] 5524 // CHECK: ret void 5525 void test_vst1_lane_f16(float16_t *a, float16x4_t b) { 5526 vst1_lane_f16(a, b, 3); 5527 } 5528 5529 // CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #0 { 5530 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 5531 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 5532 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 5533 // CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 5534 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float* 5535 // CHECK: store float [[TMP3]], float* [[TMP4]] 5536 // CHECK: ret void 5537 void test_vst1_lane_f32(float32_t *a, float32x2_t b) { 5538 vst1_lane_f32(a, b, 1); 5539 } 5540 5541 // CHECK-LABEL: define void @test_vst1_lane_f64(double* %a, <1 x double> %b) #0 { 5542 // CHECK: [[TMP0:%.*]] = bitcast double* %a to i8* 5543 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 5544 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 5545 // CHECK: [[TMP3:%.*]] = extractelement <1 x double> [[TMP2]], i32 0 5546 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to double* 5547 // CHECK: store double [[TMP3]], double* [[TMP4]] 5548 // CHECK: ret void 5549 void test_vst1_lane_f64(float64_t *a, float64x1_t b) { 5550 vst1_lane_f64(a, b, 0); 5551 } 5552 5553 // CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #0 { 5554 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 5555 // CHECK: store i8 [[TMP0]], i8* %a 5556 // CHECK: ret void 5557 void test_vst1_lane_p8(poly8_t *a, poly8x8_t b) { 5558 vst1_lane_p8(a, b, 7); 5559 } 5560 5561 // CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #0 { 5562 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 5563 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5564 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5565 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 5566 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 5567 // CHECK: store i16 [[TMP3]], i16* [[TMP4]] 5568 // CHECK: ret void 5569 void test_vst1_lane_p16(poly16_t *a, poly16x4_t b) { 5570 vst1_lane_p16(a, b, 3); 5571 } 5572 5573 // CHECK-LABEL: define void @test_vst1_lane_p64(i64* %a, <1 x i64> %b) #0 { 5574 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 5575 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 5576 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 5577 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 5578 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 5579 // CHECK: store i64 [[TMP3]], i64* [[TMP4]] 5580 // CHECK: ret void 5581 void test_vst1_lane_p64(poly64_t *a, poly64x1_t b) { 5582 vst1_lane_p64(a, b, 0); 5583 } 5584 5585 // CHECK-LABEL: define void @test_vst2q_lane_u8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 5586 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 5587 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 5588 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 5589 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 5590 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* 5591 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* 5592 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5593 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 5594 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 5595 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 5596 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 5597 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 5598 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 5599 // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, i8* %a) 5600 // CHECK: ret void 5601 void test_vst2q_lane_u8(uint8_t *a, uint8x16x2_t b) { 5602 vst2q_lane_u8(a, b, 15); 5603 } 5604 5605 // CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 5606 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 5607 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 5608 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 5609 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 5610 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 5611 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 5612 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5613 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 5614 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 5615 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 5616 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5617 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 5618 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 5619 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 5620 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5621 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5622 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 5623 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5624 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, i8* [[TMP2]]) 5625 // CHECK: ret void 5626 void test_vst2q_lane_u16(uint16_t *a, uint16x8x2_t b) { 5627 vst2q_lane_u16(a, b, 7); 5628 } 5629 5630 // CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 5631 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 5632 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 5633 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 5634 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 5635 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 5636 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 5637 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5638 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 5639 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 5640 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 5641 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 5642 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 5643 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 5644 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 5645 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 5646 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 5647 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 5648 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 5649 // CHECK: call void @llvm.aarch64.neon.st2lane.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i64 3, i8* [[TMP2]]) 5650 // CHECK: ret void 5651 void test_vst2q_lane_u32(uint32_t *a, uint32x4x2_t b) { 5652 vst2q_lane_u32(a, b, 3); 5653 } 5654 5655 // CHECK-LABEL: define void @test_vst2q_lane_u64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 5656 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 5657 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 5658 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 5659 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 5660 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* 5661 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* 5662 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5663 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 5664 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 5665 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 5666 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 5667 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 5668 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 5669 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 5670 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 5671 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 5672 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 5673 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 5674 // CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, i8* [[TMP2]]) 5675 // CHECK: ret void 5676 void test_vst2q_lane_u64(uint64_t *a, uint64x2x2_t b) { 5677 vst2q_lane_u64(a, b, 1); 5678 } 5679 5680 // CHECK-LABEL: define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 5681 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 5682 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 5683 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 5684 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 5685 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* 5686 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* 5687 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5688 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 5689 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 5690 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 5691 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 5692 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 5693 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 5694 // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, i8* %a) 5695 // CHECK: ret void 5696 void test_vst2q_lane_s8(int8_t *a, int8x16x2_t b) { 5697 vst2q_lane_s8(a, b, 15); 5698 } 5699 5700 // CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 5701 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 5702 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 5703 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 5704 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 5705 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 5706 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 5707 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5708 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 5709 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 5710 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 5711 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5712 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 5713 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 5714 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 5715 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5716 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5717 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 5718 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5719 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, i8* [[TMP2]]) 5720 // CHECK: ret void 5721 void test_vst2q_lane_s16(int16_t *a, int16x8x2_t b) { 5722 vst2q_lane_s16(a, b, 7); 5723 } 5724 5725 // CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) #0 { 5726 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 5727 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 5728 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 5729 // CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 5730 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 5731 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 5732 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5733 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 5734 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 5735 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i64 0, i64 0 5736 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 5737 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 5738 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 5739 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 5740 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 5741 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 5742 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 5743 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 5744 // CHECK: call void @llvm.aarch64.neon.st2lane.v4i32.p0i8(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i64 3, i8* [[TMP2]]) 5745 // CHECK: ret void 5746 void test_vst2q_lane_s32(int32_t *a, int32x4x2_t b) { 5747 vst2q_lane_s32(a, b, 3); 5748 } 5749 5750 // CHECK-LABEL: define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 5751 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 5752 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 5753 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 5754 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 5755 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* 5756 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* 5757 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5758 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 5759 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 5760 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 5761 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 5762 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 5763 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 5764 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 5765 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 5766 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 5767 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 5768 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 5769 // CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, i8* [[TMP2]]) 5770 // CHECK: ret void 5771 void test_vst2q_lane_s64(int64_t *a, int64x2x2_t b) { 5772 vst2q_lane_s64(a, b, 1); 5773 } 5774 5775 // CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [2 x <8 x half>] %b.coerce) #0 { 5776 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 5777 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 5778 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 5779 // CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 5780 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 5781 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 5782 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5783 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 5784 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 5785 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i64 0, i64 0 5786 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 5787 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 5788 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 5789 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 5790 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 5791 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 5792 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 5793 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5794 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, i8* [[TMP2]]) 5795 // CHECK: ret void 5796 void test_vst2q_lane_f16(float16_t *a, float16x8x2_t b) { 5797 vst2q_lane_f16(a, b, 7); 5798 } 5799 5800 // CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) #0 { 5801 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 5802 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 5803 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 5804 // CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 5805 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 5806 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 5807 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5808 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 5809 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 5810 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i64 0, i64 0 5811 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 5812 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 5813 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 5814 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 5815 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 5816 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 5817 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 5818 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 5819 // CHECK: call void @llvm.aarch64.neon.st2lane.v4f32.p0i8(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i64 3, i8* [[TMP2]]) 5820 // CHECK: ret void 5821 void test_vst2q_lane_f32(float32_t *a, float32x4x2_t b) { 5822 vst2q_lane_f32(a, b, 3); 5823 } 5824 5825 // CHECK-LABEL: define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) #0 { 5826 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 5827 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 5828 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0 5829 // CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16 5830 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8* 5831 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x2_t* [[B]] to i8* 5832 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5833 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 5834 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 5835 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]], i64 0, i64 0 5836 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 5837 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 5838 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 5839 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 5840 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 5841 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 5842 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 5843 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 5844 // CHECK: call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i64 1, i8* [[TMP2]]) 5845 // CHECK: ret void 5846 void test_vst2q_lane_f64(float64_t *a, float64x2x2_t b) { 5847 vst2q_lane_f64(a, b, 1); 5848 } 5849 5850 // CHECK-LABEL: define void @test_vst2q_lane_p8(i8* %a, [2 x <16 x i8>] %b.coerce) #0 { 5851 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 5852 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 5853 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 5854 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 5855 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* 5856 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* 5857 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5858 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 5859 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 5860 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 5861 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 5862 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 5863 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 5864 // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, i8* %a) 5865 // CHECK: ret void 5866 void test_vst2q_lane_p8(poly8_t *a, poly8x16x2_t b) { 5867 vst2q_lane_p8(a, b, 15); 5868 } 5869 5870 // CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [2 x <8 x i16>] %b.coerce) #0 { 5871 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 5872 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 5873 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 5874 // CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 5875 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 5876 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 5877 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5878 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 5879 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 5880 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i64 0, i64 0 5881 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5882 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 5883 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 5884 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 5885 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5886 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5887 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 5888 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5889 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0i8(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, i8* [[TMP2]]) 5890 // CHECK: ret void 5891 void test_vst2q_lane_p16(poly16_t *a, poly16x8x2_t b) { 5892 vst2q_lane_p16(a, b, 7); 5893 } 5894 5895 // CHECK-LABEL: define void @test_vst2q_lane_p64(i64* %a, [2 x <2 x i64>] %b.coerce) #0 { 5896 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 5897 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 5898 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0 5899 // CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 5900 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* 5901 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[B]] to i8* 5902 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 5903 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 5904 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 5905 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], i64 0, i64 0 5906 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 5907 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 5908 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 5909 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 5910 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 5911 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 5912 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 5913 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 5914 // CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, i8* [[TMP2]]) 5915 // CHECK: ret void 5916 void test_vst2q_lane_p64(poly64_t *a, poly64x2x2_t b) { 5917 vst2q_lane_p64(a, b, 1); 5918 } 5919 5920 // CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 5921 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 5922 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 5923 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 5924 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 5925 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 5926 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 5927 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 5928 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 5929 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 5930 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 5931 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 5932 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 5933 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 5934 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, i8* %a) 5935 // CHECK: ret void 5936 void test_vst2_lane_u8(uint8_t *a, uint8x8x2_t b) { 5937 vst2_lane_u8(a, b, 7); 5938 } 5939 5940 // CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 5941 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 5942 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 5943 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 5944 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 5945 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 5946 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 5947 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 5948 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 5949 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 5950 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 5951 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 5952 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 5953 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 5954 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 5955 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 5956 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5957 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 5958 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5959 // CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, i8* [[TMP2]]) 5960 // CHECK: ret void 5961 void test_vst2_lane_u16(uint16_t *a, uint16x4x2_t b) { 5962 vst2_lane_u16(a, b, 3); 5963 } 5964 5965 // CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 5966 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 5967 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 5968 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 5969 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 5970 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 5971 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 5972 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 5973 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 5974 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 5975 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 5976 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 5977 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 5978 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 5979 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 5980 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 5981 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 5982 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 5983 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 5984 // CHECK: call void @llvm.aarch64.neon.st2lane.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i64 1, i8* [[TMP2]]) 5985 // CHECK: ret void 5986 void test_vst2_lane_u32(uint32_t *a, uint32x2x2_t b) { 5987 vst2_lane_u32(a, b, 1); 5988 } 5989 5990 // CHECK-LABEL: define void @test_vst2_lane_u64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 5991 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 5992 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 5993 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 5994 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 5995 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* 5996 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* 5997 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 5998 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 5999 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 6000 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 6001 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 6002 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 6003 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 6004 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 6005 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 6006 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 6007 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 6008 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 6009 // CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, i8* [[TMP2]]) 6010 // CHECK: ret void 6011 void test_vst2_lane_u64(uint64_t *a, uint64x1x2_t b) { 6012 vst2_lane_u64(a, b, 0); 6013 } 6014 6015 // CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 6016 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 6017 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 6018 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 6019 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 6020 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 6021 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 6022 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6023 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 6024 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 6025 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6026 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 6027 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 6028 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6029 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, i8* %a) 6030 // CHECK: ret void 6031 void test_vst2_lane_s8(int8_t *a, int8x8x2_t b) { 6032 vst2_lane_s8(a, b, 7); 6033 } 6034 6035 // CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 6036 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 6037 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 6038 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 6039 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 6040 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 6041 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 6042 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6043 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 6044 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 6045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 6046 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 6047 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 6048 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 6049 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 6050 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 6051 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 6052 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 6053 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6054 // CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, i8* [[TMP2]]) 6055 // CHECK: ret void 6056 void test_vst2_lane_s16(int16_t *a, int16x4x2_t b) { 6057 vst2_lane_s16(a, b, 3); 6058 } 6059 6060 // CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) #0 { 6061 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 6062 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 6063 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 6064 // CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 6065 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 6066 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 6067 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6068 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 6069 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 6070 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i64 0, i64 0 6071 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 6072 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 6073 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 6074 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 6075 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 6076 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 6077 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 6078 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 6079 // CHECK: call void @llvm.aarch64.neon.st2lane.v2i32.p0i8(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i64 1, i8* [[TMP2]]) 6080 // CHECK: ret void 6081 void test_vst2_lane_s32(int32_t *a, int32x2x2_t b) { 6082 vst2_lane_s32(a, b, 1); 6083 } 6084 6085 // CHECK-LABEL: define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 6086 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 6087 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 6088 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 6089 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 6090 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* 6091 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* 6092 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6093 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 6094 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 6095 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 6096 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 6097 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 6098 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 6099 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 6100 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 6101 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 6102 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 6103 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 6104 // CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, i8* [[TMP2]]) 6105 // CHECK: ret void 6106 void test_vst2_lane_s64(int64_t *a, int64x1x2_t b) { 6107 vst2_lane_s64(a, b, 0); 6108 } 6109 6110 // CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x <4 x half>] %b.coerce) #0 { 6111 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 6112 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 6113 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 6114 // CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 6115 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 6116 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 6117 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6118 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 6119 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 6120 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i64 0, i64 0 6121 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 6122 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 6123 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 6124 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 6125 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 6126 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 6127 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 6128 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6129 // CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, i8* [[TMP2]]) 6130 // CHECK: ret void 6131 void test_vst2_lane_f16(float16_t *a, float16x4x2_t b) { 6132 vst2_lane_f16(a, b, 3); 6133 } 6134 6135 // CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) #0 { 6136 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 6137 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 6138 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 6139 // CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 6140 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 6141 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 6142 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6143 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 6144 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 6145 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i64 0, i64 0 6146 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 6147 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 6148 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 6149 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 6150 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 6151 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 6152 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 6153 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 6154 // CHECK: call void @llvm.aarch64.neon.st2lane.v2f32.p0i8(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i64 1, i8* [[TMP2]]) 6155 // CHECK: ret void 6156 void test_vst2_lane_f32(float32_t *a, float32x2x2_t b) { 6157 vst2_lane_f32(a, b, 1); 6158 } 6159 6160 // CHECK-LABEL: define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) #0 { 6161 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 6162 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 6163 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0 6164 // CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8 6165 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8* 6166 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x2_t* [[B]] to i8* 6167 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6168 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 6169 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 6170 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]], i64 0, i64 0 6171 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 6172 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 6173 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 6174 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 6175 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 6176 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 6177 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 6178 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 6179 // CHECK: call void @llvm.aarch64.neon.st2lane.v1f64.p0i8(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i64 0, i8* [[TMP2]]) 6180 // CHECK: ret void 6181 void test_vst2_lane_f64(float64_t *a, float64x1x2_t b) { 6182 vst2_lane_f64(a, b, 0); 6183 } 6184 6185 // CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x <8 x i8>] %b.coerce) #0 { 6186 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 6187 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 6188 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 6189 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 6190 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 6191 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 6192 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6193 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 6194 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 6195 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6196 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 6197 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 6198 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6199 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, i8* %a) 6200 // CHECK: ret void 6201 void test_vst2_lane_p8(poly8_t *a, poly8x8x2_t b) { 6202 vst2_lane_p8(a, b, 7); 6203 } 6204 6205 // CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x <4 x i16>] %b.coerce) #0 { 6206 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 6207 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 6208 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 6209 // CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 6210 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 6211 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 6212 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6213 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 6214 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 6215 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i64 0, i64 0 6216 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 6217 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 6218 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 6219 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 6220 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 6221 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 6222 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 6223 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6224 // CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0i8(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, i8* [[TMP2]]) 6225 // CHECK: ret void 6226 void test_vst2_lane_p16(poly16_t *a, poly16x4x2_t b) { 6227 vst2_lane_p16(a, b, 3); 6228 } 6229 6230 // CHECK-LABEL: define void @test_vst2_lane_p64(i64* %a, [2 x <1 x i64>] %b.coerce) #0 { 6231 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 6232 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 6233 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0 6234 // CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 6235 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* 6236 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[B]] to i8* 6237 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 6238 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 6239 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 6240 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i64 0, i64 0 6241 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 6242 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 6243 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 6244 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 6245 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 6246 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 6247 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 6248 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 6249 // CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, i8* [[TMP2]]) 6250 // CHECK: ret void 6251 void test_vst2_lane_p64(poly64_t *a, poly64x1x2_t b) { 6252 vst2_lane_p64(a, b, 0); 6253 } 6254 6255 // CHECK-LABEL: define void @test_vst3q_lane_u8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 6256 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 6257 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 6258 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 6259 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 6260 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* 6261 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* 6262 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6263 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 6264 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 6265 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 6266 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 6267 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 6268 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 6269 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 6270 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 6271 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 6272 // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %a) 6273 // CHECK: ret void 6274 void test_vst3q_lane_u8(uint8_t *a, uint8x16x3_t b) { 6275 vst3q_lane_u8(a, b, 15); 6276 } 6277 6278 // CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 6279 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 6280 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 6281 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 6282 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 6283 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 6284 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 6285 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6286 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 6287 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6288 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 6289 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6290 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 6291 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6292 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 6293 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6294 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6295 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6296 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 6297 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6298 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6299 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 6300 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6301 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6302 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]]) 6303 // CHECK: ret void 6304 void test_vst3q_lane_u16(uint16_t *a, uint16x8x3_t b) { 6305 vst3q_lane_u16(a, b, 7); 6306 } 6307 6308 // CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 6309 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 6310 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 6311 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 6312 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 6313 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 6314 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 6315 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6316 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 6317 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6318 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 6319 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 6320 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 6321 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6322 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 6323 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 6324 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 6325 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6326 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 6327 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 6328 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 6329 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 6330 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 6331 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 6332 // CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i64 3, i8* [[TMP2]]) 6333 // CHECK: ret void 6334 void test_vst3q_lane_u32(uint32_t *a, uint32x4x3_t b) { 6335 vst3q_lane_u32(a, b, 3); 6336 } 6337 6338 // CHECK-LABEL: define void @test_vst3q_lane_u64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 6339 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 6340 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 6341 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 6342 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 6343 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* 6344 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* 6345 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6346 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 6347 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 6348 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 6349 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 6350 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 6351 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 6352 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 6353 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 6354 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 6355 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 6356 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 6357 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 6358 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 6359 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 6360 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 6361 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 6362 // CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, i8* [[TMP2]]) 6363 // CHECK: ret void 6364 void test_vst3q_lane_u64(uint64_t *a, uint64x2x3_t b) { 6365 vst3q_lane_u64(a, b, 1); 6366 } 6367 6368 // CHECK-LABEL: define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 6369 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 6370 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 6371 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 6372 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 6373 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* 6374 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* 6375 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6376 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 6377 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 6378 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 6379 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 6380 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 6381 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 6382 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 6383 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 6384 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 6385 // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %a) 6386 // CHECK: ret void 6387 void test_vst3q_lane_s8(int8_t *a, int8x16x3_t b) { 6388 vst3q_lane_s8(a, b, 15); 6389 } 6390 6391 // CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 6392 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 6393 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 6394 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 6395 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 6396 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 6397 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 6398 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6399 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 6400 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6401 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 6402 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6403 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 6404 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6405 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 6406 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6407 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6408 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6409 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 6410 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6411 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6412 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 6413 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6414 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6415 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]]) 6416 // CHECK: ret void 6417 void test_vst3q_lane_s16(int16_t *a, int16x8x3_t b) { 6418 vst3q_lane_s16(a, b, 7); 6419 } 6420 6421 // CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) #0 { 6422 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 6423 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 6424 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 6425 // CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 6426 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 6427 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 6428 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6429 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 6430 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6431 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i64 0, i64 0 6432 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 6433 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 6434 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6435 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 6436 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 6437 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 6438 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6439 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 6440 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 6441 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 6442 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 6443 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 6444 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 6445 // CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i64 3, i8* [[TMP2]]) 6446 // CHECK: ret void 6447 void test_vst3q_lane_s32(int32_t *a, int32x4x3_t b) { 6448 vst3q_lane_s32(a, b, 3); 6449 } 6450 6451 // CHECK-LABEL: define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 6452 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 6453 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 6454 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 6455 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 6456 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* 6457 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* 6458 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6459 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 6460 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 6461 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 6462 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 6463 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 6464 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 6465 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 6466 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 6467 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 6468 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 6469 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 6470 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 6471 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 6472 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 6473 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 6474 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 6475 // CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, i8* [[TMP2]]) 6476 // CHECK: ret void 6477 void test_vst3q_lane_s64(int64_t *a, int64x2x3_t b) { 6478 vst3q_lane_s64(a, b, 1); 6479 } 6480 6481 // CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [3 x <8 x half>] %b.coerce) #0 { 6482 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 6483 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 6484 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 6485 // CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 6486 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 6487 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 6488 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6489 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 6490 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6491 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i64 0, i64 0 6492 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 6493 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 6494 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6495 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 6496 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 6497 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 6498 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6499 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 6500 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 6501 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 6502 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 6503 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6504 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6505 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]]) 6506 // CHECK: ret void 6507 void test_vst3q_lane_f16(float16_t *a, float16x8x3_t b) { 6508 vst3q_lane_f16(a, b, 7); 6509 } 6510 6511 // CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) #0 { 6512 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 6513 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 6514 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 6515 // CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 6516 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 6517 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 6518 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6519 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 6520 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6521 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i64 0, i64 0 6522 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 6523 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 6524 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6525 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 6526 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 6527 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 6528 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6529 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 6530 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 6531 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 6532 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 6533 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 6534 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 6535 // CHECK: call void @llvm.aarch64.neon.st3lane.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i64 3, i8* [[TMP2]]) 6536 // CHECK: ret void 6537 void test_vst3q_lane_f32(float32_t *a, float32x4x3_t b) { 6538 vst3q_lane_f32(a, b, 3); 6539 } 6540 6541 // CHECK-LABEL: define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) #0 { 6542 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 6543 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 6544 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0 6545 // CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16 6546 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8* 6547 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x3_t* [[B]] to i8* 6548 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6549 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 6550 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 6551 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]], i64 0, i64 0 6552 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 6553 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 6554 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 6555 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 6556 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 6557 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 6558 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 6559 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 6560 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 6561 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 6562 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 6563 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 6564 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 6565 // CHECK: call void @llvm.aarch64.neon.st3lane.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i64 1, i8* [[TMP2]]) 6566 // CHECK: ret void 6567 void test_vst3q_lane_f64(float64_t *a, float64x2x3_t b) { 6568 vst3q_lane_f64(a, b, 1); 6569 } 6570 6571 // CHECK-LABEL: define void @test_vst3q_lane_p8(i8* %a, [3 x <16 x i8>] %b.coerce) #0 { 6572 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 6573 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 6574 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 6575 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 6576 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* 6577 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* 6578 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6579 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 6580 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 6581 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 6582 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 6583 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 6584 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 6585 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 6586 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 6587 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 6588 // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %a) 6589 // CHECK: ret void 6590 void test_vst3q_lane_p8(poly8_t *a, poly8x16x3_t b) { 6591 vst3q_lane_p8(a, b, 15); 6592 } 6593 6594 // CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [3 x <8 x i16>] %b.coerce) #0 { 6595 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 6596 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 6597 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 6598 // CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 6599 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 6600 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 6601 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6602 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 6603 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6604 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i64 0, i64 0 6605 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6606 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 6607 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6608 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 6609 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6610 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6611 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6612 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 6613 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6614 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6615 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 6616 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6617 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6618 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]]) 6619 // CHECK: ret void 6620 void test_vst3q_lane_p16(poly16_t *a, poly16x8x3_t b) { 6621 vst3q_lane_p16(a, b, 7); 6622 } 6623 6624 // CHECK-LABEL: define void @test_vst3q_lane_p64(i64* %a, [3 x <2 x i64>] %b.coerce) #0 { 6625 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 6626 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 6627 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0 6628 // CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 6629 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* 6630 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[B]] to i8* 6631 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 6632 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 6633 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 6634 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], i64 0, i64 0 6635 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 6636 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 6637 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 6638 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 6639 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 6640 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 6641 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 6642 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 6643 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 6644 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 6645 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 6646 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 6647 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 6648 // CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, i8* [[TMP2]]) 6649 // CHECK: ret void 6650 void test_vst3q_lane_p64(poly64_t *a, poly64x2x3_t b) { 6651 vst3q_lane_p64(a, b, 1); 6652 } 6653 6654 // CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 6655 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 6656 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 6657 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 6658 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 6659 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 6660 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 6661 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 6664 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6665 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6666 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 6667 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6668 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6669 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 6670 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 6671 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) 6672 // CHECK: ret void 6673 void test_vst3_lane_u8(uint8_t *a, uint8x8x3_t b) { 6674 vst3_lane_u8(a, b, 7); 6675 } 6676 6677 // CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 6678 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 6679 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 6680 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 6681 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 6682 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 6683 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 6684 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6685 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 6686 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6687 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 6688 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 6689 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 6690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 6692 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 6693 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 6694 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6695 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 6696 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 6697 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 6698 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 6699 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6700 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 6701 // CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]]) 6702 // CHECK: ret void 6703 void test_vst3_lane_u16(uint16_t *a, uint16x4x3_t b) { 6704 vst3_lane_u16(a, b, 3); 6705 } 6706 6707 // CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 6708 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 6709 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 6710 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 6711 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 6712 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 6713 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 6714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6715 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 6716 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6717 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 6718 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 6719 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 6720 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6721 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 6722 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 6723 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 6724 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6725 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 6726 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 6727 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 6728 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 6729 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 6730 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 6731 // CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i64 1, i8* [[TMP2]]) 6732 // CHECK: ret void 6733 void test_vst3_lane_u32(uint32_t *a, uint32x2x3_t b) { 6734 vst3_lane_u32(a, b, 1); 6735 } 6736 6737 // CHECK-LABEL: define void @test_vst3_lane_u64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 6738 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 6739 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 6740 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 6741 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 6742 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* 6743 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* 6744 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6745 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 6746 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 6747 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 6748 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 6749 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 6750 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 6751 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 6752 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 6753 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 6754 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 6755 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 6756 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 6757 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 6758 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 6759 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 6760 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 6761 // CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, i8* [[TMP2]]) 6762 // CHECK: ret void 6763 void test_vst3_lane_u64(uint64_t *a, uint64x1x3_t b) { 6764 vst3_lane_u64(a, b, 0); 6765 } 6766 6767 // CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 6768 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 6769 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 6770 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 6771 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 6772 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 6773 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 6774 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6775 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6776 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 6777 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6778 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6779 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 6780 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6781 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6782 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 6783 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 6784 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) 6785 // CHECK: ret void 6786 void test_vst3_lane_s8(int8_t *a, int8x8x3_t b) { 6787 vst3_lane_s8(a, b, 7); 6788 } 6789 6790 // CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 6791 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 6792 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 6793 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 6794 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 6795 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 6796 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 6797 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6798 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 6799 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6800 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 6801 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 6802 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 6803 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6804 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 6805 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 6806 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 6807 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6808 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 6809 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 6810 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 6811 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 6812 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6813 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 6814 // CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]]) 6815 // CHECK: ret void 6816 void test_vst3_lane_s16(int16_t *a, int16x4x3_t b) { 6817 vst3_lane_s16(a, b, 3); 6818 } 6819 6820 // CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) #0 { 6821 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 6822 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 6823 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 6824 // CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 6825 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 6826 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 6827 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6828 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 6829 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6830 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i64 0, i64 0 6831 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 6832 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 6833 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6834 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 6835 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 6836 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 6837 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6838 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 6839 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 6840 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 6841 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 6842 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 6843 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 6844 // CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i64 1, i8* [[TMP2]]) 6845 // CHECK: ret void 6846 void test_vst3_lane_s32(int32_t *a, int32x2x3_t b) { 6847 vst3_lane_s32(a, b, 1); 6848 } 6849 6850 // CHECK-LABEL: define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 6851 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 6852 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 6853 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 6854 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 6855 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* 6856 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* 6857 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6858 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 6859 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 6860 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 6861 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 6862 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 6863 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 6864 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 6865 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 6866 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 6867 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 6868 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 6869 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 6870 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 6871 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 6872 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 6873 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 6874 // CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, i8* [[TMP2]]) 6875 // CHECK: ret void 6876 void test_vst3_lane_s64(int64_t *a, int64x1x3_t b) { 6877 vst3_lane_s64(a, b, 0); 6878 } 6879 6880 // CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x <4 x half>] %b.coerce) #0 { 6881 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 6882 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 6883 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 6884 // CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 6885 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 6886 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 6887 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6888 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 6889 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 6890 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i64 0, i64 0 6891 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 6892 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 6893 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 6894 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 6895 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 6896 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 6897 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 6898 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 6899 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 6900 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 6901 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 6902 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6903 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 6904 // CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]]) 6905 // CHECK: ret void 6906 void test_vst3_lane_f16(float16_t *a, float16x4x3_t b) { 6907 vst3_lane_f16(a, b, 3); 6908 } 6909 6910 // CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) #0 { 6911 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 6912 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 6913 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 6914 // CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 6915 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 6916 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 6917 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6918 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 6919 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 6920 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i64 0, i64 0 6921 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 6922 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 6923 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 6924 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 6925 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 6926 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 6927 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 6928 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 6929 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 6930 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 6931 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 6932 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 6933 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 6934 // CHECK: call void @llvm.aarch64.neon.st3lane.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i64 1, i8* [[TMP2]]) 6935 // CHECK: ret void 6936 void test_vst3_lane_f32(float32_t *a, float32x2x3_t b) { 6937 vst3_lane_f32(a, b, 1); 6938 } 6939 6940 // CHECK-LABEL: define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) #0 { 6941 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 6942 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 6943 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0 6944 // CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8 6945 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8* 6946 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x3_t* [[B]] to i8* 6947 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6948 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 6949 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 6950 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]], i64 0, i64 0 6951 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 6952 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 6953 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 6954 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 6955 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 6956 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 6957 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 6958 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 6959 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 6960 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 6961 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 6962 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 6963 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 6964 // CHECK: call void @llvm.aarch64.neon.st3lane.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i64 0, i8* [[TMP2]]) 6965 // CHECK: ret void 6966 void test_vst3_lane_f64(float64_t *a, float64x1x3_t b) { 6967 vst3_lane_f64(a, b, 0); 6968 } 6969 6970 // CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x <8 x i8>] %b.coerce) #0 { 6971 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 6972 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 6973 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 6974 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 6975 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 6976 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 6977 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 6978 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 6979 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 6980 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6981 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 6982 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 6983 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6984 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 6985 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 6986 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 6987 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) 6988 // CHECK: ret void 6989 void test_vst3_lane_p8(poly8_t *a, poly8x8x3_t b) { 6990 vst3_lane_p8(a, b, 7); 6991 } 6992 6993 // CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x <4 x i16>] %b.coerce) #0 { 6994 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 6995 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 6996 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 6997 // CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 6998 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 6999 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 7000 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 7001 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 7002 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7003 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i64 0, i64 0 7004 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 7005 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 7006 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7007 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 7008 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 7009 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 7010 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7011 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 7012 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 7013 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 7014 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 7015 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7016 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7017 // CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]]) 7018 // CHECK: ret void 7019 void test_vst3_lane_p16(poly16_t *a, poly16x4x3_t b) { 7020 vst3_lane_p16(a, b, 3); 7021 } 7022 7023 // CHECK-LABEL: define void @test_vst3_lane_p64(i64* %a, [3 x <1 x i64>] %b.coerce) #0 { 7024 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 7025 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 7026 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0 7027 // CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 7028 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* 7029 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[B]] to i8* 7030 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 7031 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 7032 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 7033 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i64 0, i64 0 7034 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 7035 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 7036 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 7037 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 7038 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 7039 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 7040 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 7041 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 7042 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 7043 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 7044 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 7045 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 7046 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 7047 // CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, i8* [[TMP2]]) 7048 // CHECK: ret void 7049 void test_vst3_lane_p64(poly64_t *a, poly64x1x3_t b) { 7050 vst3_lane_p64(a, b, 0); 7051 } 7052 7053 // CHECK-LABEL: define void @test_vst4q_lane_u8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 7054 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 7055 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 7056 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 7057 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 7058 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* 7059 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* 7060 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7061 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 7062 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 7063 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 7064 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 7065 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 7066 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 7067 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 7068 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 7069 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 7070 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 7071 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 7072 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 7073 // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %a) 7074 // CHECK: ret void 7075 void test_vst4q_lane_u8(uint8_t *a, uint8x16x4_t b) { 7076 vst4q_lane_u8(a, b, 15); 7077 } 7078 7079 // CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 7080 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 7081 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 7082 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 7083 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 7084 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 7085 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 7086 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7087 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 7088 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7089 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 7090 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 7091 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 7092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 7094 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 7095 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 7096 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7097 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 7098 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 7099 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 7100 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7101 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 7102 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 7103 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 7104 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 7105 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7106 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7107 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7108 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]]) 7109 // CHECK: ret void 7110 void test_vst4q_lane_u16(uint16_t *a, uint16x8x4_t b) { 7111 vst4q_lane_u16(a, b, 7); 7112 } 7113 7114 // CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 7115 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 7116 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 7117 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 7118 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 7119 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 7120 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 7121 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7122 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 7123 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7124 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 7125 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 7126 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 7127 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7128 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 7129 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 7130 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 7131 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7132 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 7133 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 7134 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 7135 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7136 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 7137 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 7138 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 7139 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 7140 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 7141 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 7142 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 7143 // CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i64 3, i8* [[TMP2]]) 7144 // CHECK: ret void 7145 void test_vst4q_lane_u32(uint32_t *a, uint32x4x4_t b) { 7146 vst4q_lane_u32(a, b, 3); 7147 } 7148 7149 // CHECK-LABEL: define void @test_vst4q_lane_u64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 7150 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 7151 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 7152 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 7153 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 7154 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* 7155 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* 7156 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7157 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 7158 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 7159 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 7160 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 7161 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 7162 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 7163 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 7164 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 7165 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 7166 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 7167 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 7168 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 7169 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 7170 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 7171 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 7172 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 7173 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 7174 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 7175 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 7176 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 7177 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 7178 // CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, i8* [[TMP2]]) 7179 // CHECK: ret void 7180 void test_vst4q_lane_u64(uint64_t *a, uint64x2x4_t b) { 7181 vst4q_lane_u64(a, b, 1); 7182 } 7183 7184 // CHECK-LABEL: define void @test_vst4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 7185 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 7186 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 7187 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 7188 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 7189 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* 7190 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* 7191 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7192 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 7193 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 7194 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 7195 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 7196 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 7197 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 7198 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 7199 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 7200 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 7201 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 7202 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 7203 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 7204 // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %a) 7205 // CHECK: ret void 7206 void test_vst4q_lane_s8(int8_t *a, int8x16x4_t b) { 7207 vst4q_lane_s8(a, b, 15); 7208 } 7209 7210 // CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 7211 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 7212 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 7213 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 7214 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 7215 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 7216 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 7217 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7218 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 7219 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7220 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 7221 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 7222 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 7223 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7224 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 7225 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 7226 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 7227 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7228 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 7229 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 7230 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 7231 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7232 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 7233 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 7234 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 7235 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 7236 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7237 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7238 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7239 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]]) 7240 // CHECK: ret void 7241 void test_vst4q_lane_s16(int16_t *a, int16x8x4_t b) { 7242 vst4q_lane_s16(a, b, 7); 7243 } 7244 7245 // CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) #0 { 7246 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 7247 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 7248 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 7249 // CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 7250 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 7251 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 7252 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7253 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 7254 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7255 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i64 0, i64 0 7256 // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 7257 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 7258 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7259 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 7260 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 7261 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 7262 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7263 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 7264 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 7265 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 7266 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7267 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 7268 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 7269 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 7270 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 7271 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 7272 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 7273 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 7274 // CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i64 3, i8* [[TMP2]]) 7275 // CHECK: ret void 7276 void test_vst4q_lane_s32(int32_t *a, int32x4x4_t b) { 7277 vst4q_lane_s32(a, b, 3); 7278 } 7279 7280 // CHECK-LABEL: define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 7281 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 7282 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 7283 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 7284 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 7285 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* 7286 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* 7287 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7288 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 7289 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 7290 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 7291 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 7292 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 7293 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 7294 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 7295 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 7296 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 7297 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 7298 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 7299 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 7300 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 7301 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 7302 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 7303 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 7304 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 7305 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 7306 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 7307 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 7308 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 7309 // CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, i8* [[TMP2]]) 7310 // CHECK: ret void 7311 void test_vst4q_lane_s64(int64_t *a, int64x2x4_t b) { 7312 vst4q_lane_s64(a, b, 1); 7313 } 7314 7315 // CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [4 x <8 x half>] %b.coerce) #0 { 7316 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 7317 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 7318 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 7319 // CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 7320 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 7321 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 7322 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7323 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 7324 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7325 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i64 0, i64 0 7326 // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 7327 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 7328 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7329 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 7330 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 7331 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 7332 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7333 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 7334 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 7335 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 7336 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7337 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 7338 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 7339 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 7340 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 7341 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7342 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7343 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7344 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]]) 7345 // CHECK: ret void 7346 void test_vst4q_lane_f16(float16_t *a, float16x8x4_t b) { 7347 vst4q_lane_f16(a, b, 7); 7348 } 7349 7350 // CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) #0 { 7351 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 7352 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 7353 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 7354 // CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 7355 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 7356 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 7357 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7358 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 7359 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 7360 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i64 0, i64 0 7361 // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 7362 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 7363 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 7364 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 7365 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 7366 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 7367 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 7368 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 7369 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 7370 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 7371 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 7372 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 7373 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 7374 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 7375 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 7376 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 7377 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 7378 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 7379 // CHECK: call void @llvm.aarch64.neon.st4lane.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i64 3, i8* [[TMP2]]) 7380 // CHECK: ret void 7381 void test_vst4q_lane_f32(float32_t *a, float32x4x4_t b) { 7382 vst4q_lane_f32(a, b, 3); 7383 } 7384 7385 // CHECK-LABEL: define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) #0 { 7386 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 7387 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 7388 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0 7389 // CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16 7390 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8* 7391 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x2x4_t* [[B]] to i8* 7392 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7393 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 7394 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 7395 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]], i64 0, i64 0 7396 // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 7397 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 7398 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 7399 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 7400 // CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 7401 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 7402 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 7403 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 7404 // CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 7405 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 7406 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 7407 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 7408 // CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 7409 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> 7410 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 7411 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 7412 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 7413 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> 7414 // CHECK: call void @llvm.aarch64.neon.st4lane.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i64 1, i8* [[TMP2]]) 7415 // CHECK: ret void 7416 void test_vst4q_lane_f64(float64_t *a, float64x2x4_t b) { 7417 vst4q_lane_f64(a, b, 1); 7418 } 7419 7420 // CHECK-LABEL: define void @test_vst4q_lane_p8(i8* %a, [4 x <16 x i8>] %b.coerce) #0 { 7421 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 7422 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 7423 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 7424 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 7425 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* 7426 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* 7427 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7428 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 7429 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 7430 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 7431 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 7432 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 7433 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 7434 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 7435 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 7436 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 7437 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 7438 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 7439 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 7440 // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %a) 7441 // CHECK: ret void 7442 void test_vst4q_lane_p8(poly8_t *a, poly8x16x4_t b) { 7443 vst4q_lane_p8(a, b, 15); 7444 } 7445 7446 // CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [4 x <8 x i16>] %b.coerce) #0 { 7447 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 7448 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 7449 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 7450 // CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 7451 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 7452 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 7453 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7454 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 7455 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 7456 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i64 0, i64 0 7457 // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 7458 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 7459 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 7460 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 7461 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 7462 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 7463 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 7464 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 7465 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 7466 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 7467 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 7468 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 7469 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 7470 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 7471 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 7472 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7473 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7474 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7475 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]]) 7476 // CHECK: ret void 7477 void test_vst4q_lane_p16(poly16_t *a, poly16x8x4_t b) { 7478 vst4q_lane_p16(a, b, 7); 7479 } 7480 7481 // CHECK-LABEL: define void @test_vst4q_lane_p64(i64* %a, [4 x <2 x i64>] %b.coerce) #0 { 7482 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 7483 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 7484 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0 7485 // CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 7486 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* 7487 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[B]] to i8* 7488 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 7489 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 7490 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 7491 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], i64 0, i64 0 7492 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 7493 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 7494 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 7495 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 7496 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 7497 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 7498 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 7499 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 7500 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 7501 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 7502 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 7503 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 7504 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 7505 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 7506 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 7507 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 7508 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 7509 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 7510 // CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, i8* [[TMP2]]) 7511 // CHECK: ret void 7512 void test_vst4q_lane_p64(poly64_t *a, poly64x2x4_t b) { 7513 vst4q_lane_p64(a, b, 1); 7514 } 7515 7516 // CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 7517 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 7518 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 7519 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 7520 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 7521 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 7522 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 7523 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7524 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 7525 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 7526 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 7527 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 7528 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 7529 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 7530 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 7531 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 7532 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 7533 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 7534 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 7535 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 7536 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) 7537 // CHECK: ret void 7538 void test_vst4_lane_u8(uint8_t *a, uint8x8x4_t b) { 7539 vst4_lane_u8(a, b, 7); 7540 } 7541 7542 // CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 7543 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 7544 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 7545 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 7546 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 7547 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 7548 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 7549 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7550 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 7551 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 7552 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 7553 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 7554 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 7555 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 7556 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 7557 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 7558 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 7559 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 7560 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 7561 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 7562 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 7563 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 7564 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 7565 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 7566 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 7567 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 7568 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7569 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7570 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 7571 // CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]]) 7572 // CHECK: ret void 7573 void test_vst4_lane_u16(uint16_t *a, uint16x4x4_t b) { 7574 vst4_lane_u16(a, b, 3); 7575 } 7576 7577 // CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 7578 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 7579 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 7580 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 7581 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 7582 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 7583 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 7584 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7585 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 7586 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 7587 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 7588 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 7589 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 7590 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 7591 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 7592 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 7593 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 7594 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 7595 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 7596 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 7597 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 7598 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 7599 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 7600 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 7601 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 7602 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 7603 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 7604 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 7605 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 7606 // CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i64 1, i8* [[TMP2]]) 7607 // CHECK: ret void 7608 void test_vst4_lane_u32(uint32_t *a, uint32x2x4_t b) { 7609 vst4_lane_u32(a, b, 1); 7610 } 7611 7612 // CHECK-LABEL: define void @test_vst4_lane_u64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 7613 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 7614 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 7615 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 7616 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 7617 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* 7618 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* 7619 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7620 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 7621 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 7622 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 7623 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 7624 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 7625 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 7626 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 7627 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 7628 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 7629 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 7630 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 7631 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 7632 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 7633 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 7634 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 7635 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 7636 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 7637 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 7638 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 7639 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 7640 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 7641 // CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, i8* [[TMP2]]) 7642 // CHECK: ret void 7643 void test_vst4_lane_u64(uint64_t *a, uint64x1x4_t b) { 7644 vst4_lane_u64(a, b, 0); 7645 } 7646 7647 // CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 7648 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 7649 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 7650 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 7651 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 7652 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 7653 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 7654 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7655 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 7656 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 7657 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 7658 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 7659 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 7660 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 7661 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 7662 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 7663 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 7664 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 7665 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 7666 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 7667 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) 7668 // CHECK: ret void 7669 void test_vst4_lane_s8(int8_t *a, int8x8x4_t b) { 7670 vst4_lane_s8(a, b, 7); 7671 } 7672 7673 // CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 7674 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 7675 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 7676 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 7677 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 7678 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 7679 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 7680 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7681 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 7682 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 7683 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 7684 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 7685 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 7686 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 7687 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 7688 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 7689 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 7690 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 7691 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 7692 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 7693 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 7694 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 7695 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 7696 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 7697 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 7698 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 7699 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7700 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7701 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 7702 // CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]]) 7703 // CHECK: ret void 7704 void test_vst4_lane_s16(int16_t *a, int16x4x4_t b) { 7705 vst4_lane_s16(a, b, 3); 7706 } 7707 7708 // CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) #0 { 7709 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 7710 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 7711 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 7712 // CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 7713 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 7714 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 7715 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7716 // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* 7717 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 7718 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i64 0, i64 0 7719 // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 7720 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 7721 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 7722 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 7723 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 7724 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 7725 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 7726 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 7727 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 7728 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 7729 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 7730 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 7731 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 7732 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 7733 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 7734 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 7735 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 7736 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 7737 // CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i64 1, i8* [[TMP2]]) 7738 // CHECK: ret void 7739 void test_vst4_lane_s32(int32_t *a, int32x2x4_t b) { 7740 vst4_lane_s32(a, b, 1); 7741 } 7742 7743 // CHECK-LABEL: define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 7744 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 7745 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 7746 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 7747 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 7748 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* 7749 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* 7750 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7751 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 7752 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 7753 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 7754 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 7755 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 7756 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 7757 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 7758 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 7759 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 7760 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 7761 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 7762 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 7763 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 7764 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 7765 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 7766 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 7767 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 7768 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 7769 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 7770 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 7771 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 7772 // CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, i8* [[TMP2]]) 7773 // CHECK: ret void 7774 void test_vst4_lane_s64(int64_t *a, int64x1x4_t b) { 7775 vst4_lane_s64(a, b, 0); 7776 } 7777 7778 // CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x <4 x half>] %b.coerce) #0 { 7779 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 7780 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 7781 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 7782 // CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 7783 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 7784 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 7785 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7786 // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* 7787 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 7788 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i64 0, i64 0 7789 // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 7790 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 7791 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 7792 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 7793 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 7794 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 7795 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 7796 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 7797 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 7798 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 7799 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 7800 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 7801 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 7802 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 7803 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 7804 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7805 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7806 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 7807 // CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]]) 7808 // CHECK: ret void 7809 void test_vst4_lane_f16(float16_t *a, float16x4x4_t b) { 7810 vst4_lane_f16(a, b, 3); 7811 } 7812 7813 // CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) #0 { 7814 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 7815 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 7816 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 7817 // CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 7818 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 7819 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 7820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7821 // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* 7822 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 7823 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i64 0, i64 0 7824 // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 7825 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 7826 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 7827 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 7828 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 7829 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 7830 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 7831 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 7832 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 7833 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 7834 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 7835 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 7836 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 7837 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 7838 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 7839 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 7840 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 7841 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 7842 // CHECK: call void @llvm.aarch64.neon.st4lane.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i64 1, i8* [[TMP2]]) 7843 // CHECK: ret void 7844 void test_vst4_lane_f32(float32_t *a, float32x2x4_t b) { 7845 vst4_lane_f32(a, b, 1); 7846 } 7847 7848 // CHECK-LABEL: define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) #0 { 7849 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 7850 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 7851 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0 7852 // CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8 7853 // CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8* 7854 // CHECK: [[TMP1:%.*]] = bitcast %struct.float64x1x4_t* [[B]] to i8* 7855 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7856 // CHECK: [[TMP2:%.*]] = bitcast double* %a to i8* 7857 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 7858 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]], i64 0, i64 0 7859 // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 7860 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 7861 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 7862 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 7863 // CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 7864 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 7865 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 7866 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 7867 // CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 7868 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 7869 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 7870 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 7871 // CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 7872 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> 7873 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 7874 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 7875 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 7876 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> 7877 // CHECK: call void @llvm.aarch64.neon.st4lane.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i64 0, i8* [[TMP2]]) 7878 // CHECK: ret void 7879 void test_vst4_lane_f64(float64_t *a, float64x1x4_t b) { 7880 vst4_lane_f64(a, b, 0); 7881 } 7882 7883 // CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x <8 x i8>] %b.coerce) #0 { 7884 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 7885 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 7886 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 7887 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 7888 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 7889 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 7890 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7891 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 7892 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 7893 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 7894 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 7895 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 7896 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 7897 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 7898 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 7899 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 7900 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 7901 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 7902 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 7903 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) 7904 // CHECK: ret void 7905 void test_vst4_lane_p8(poly8_t *a, poly8x8x4_t b) { 7906 vst4_lane_p8(a, b, 7); 7907 } 7908 7909 // CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x <4 x i16>] %b.coerce) #0 { 7910 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 7911 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 7912 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 7913 // CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 7914 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 7915 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 7916 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7917 // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* 7918 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 7919 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i64 0, i64 0 7920 // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 7921 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 7922 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 7923 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 7924 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 7925 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 7926 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 7927 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 7928 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 7929 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 7930 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 7931 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 7932 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 7933 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 7934 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 7935 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7936 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7937 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 7938 // CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]]) 7939 // CHECK: ret void 7940 void test_vst4_lane_p16(poly16_t *a, poly16x4x4_t b) { 7941 vst4_lane_p16(a, b, 3); 7942 } 7943 7944 // CHECK-LABEL: define void @test_vst4_lane_p64(i64* %a, [4 x <1 x i64>] %b.coerce) #0 { 7945 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 7946 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 7947 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0 7948 // CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 7949 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* 7950 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[B]] to i8* 7951 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 7952 // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* 7953 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 7954 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i64 0, i64 0 7955 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 7956 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 7957 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 7958 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 7959 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 7960 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 7961 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 7962 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 7963 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 7964 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 7965 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 7966 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 7967 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 7968 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 7969 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 7970 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 7971 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 7972 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 7973 // CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, i8* [[TMP2]]) 7974 // CHECK: ret void 7975 void test_vst4_lane_p64(poly64_t *a, poly64x1x4_t b) { 7976 vst4_lane_p64(a, b, 0); 7977 } 7978