1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 2 // RUN: -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \ 3 // RUN: | FileCheck %s 4 5 // Test new aarch64 intrinsics with poly64 6 7 #include <arm_neon.h> 8 9 // CHECK-LABEL: define <1 x i64> @test_vceq_p64(<1 x i64> %a, <1 x i64> %b) #0 { 10 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b 11 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 12 // CHECK: ret <1 x i64> [[SEXT_I]] 13 uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) { 14 return vceq_p64(a, b); 15 } 16 17 // CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #0 { 18 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b 19 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 20 // CHECK: ret <2 x i64> [[SEXT_I]] 21 uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) { 22 return vceqq_p64(a, b); 23 } 24 25 // CHECK-LABEL: define <1 x i64> @test_vtst_p64(<1 x i64> %a, <1 x i64> %b) #0 { 26 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 27 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 28 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 29 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 30 // CHECK: [[TMP4:%.*]] = and <1 x i64> [[TMP2]], [[TMP3]] 31 // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer 32 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> 33 // CHECK: ret <1 x i64> [[VTST_I]] 34 uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) { 35 return vtst_p64(a, b); 36 } 37 38 // CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #0 { 39 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 40 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 41 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 42 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 43 // CHECK: [[TMP4:%.*]] = and <2 x i64> [[TMP2]], [[TMP3]] 44 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer 45 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> 46 // CHECK: ret <2 x i64> [[VTST_I]] 47 uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) { 48 return vtstq_p64(a, b); 49 } 50 51 // CHECK-LABEL: define <1 x i64> @test_vbsl_p64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { 52 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 53 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 54 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> 55 // CHECK: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 56 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 57 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 58 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> [[VBSL_I]], [[VBSL1_I]] 59 // CHECK: [[TMP3:%.*]] = xor <1 x i64> [[VBSL_I]], <i64 -1> 60 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] 61 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 62 // CHECK: ret <1 x i64> [[VBSL5_I]] 63 poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { 64 return vbsl_p64(a, b, c); 65 } 66 67 // CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { 68 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 69 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 70 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> 71 // CHECK: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 72 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 73 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 74 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> [[VBSL_I]], [[VBSL1_I]] 75 // CHECK: [[TMP3:%.*]] = xor <2 x i64> [[VBSL_I]], <i64 -1, i64 -1> 76 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] 77 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 78 // CHECK: ret <2 x i64> [[VBSL5_I]] 79 poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) { 80 return vbslq_p64(a, b, c); 81 } 82 83 // CHECK-LABEL: define i64 @test_vget_lane_p64(<1 x i64> %v) #0 { 84 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8> 85 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 86 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 87 // CHECK: ret i64 [[VGET_LANE]] 88 poly64_t test_vget_lane_p64(poly64x1_t v) { 89 return vget_lane_p64(v, 0); 90 } 91 92 // CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #0 { 93 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> 94 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 95 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 96 // CHECK: ret i64 [[VGETQ_LANE]] 97 poly64_t test_vgetq_lane_p64(poly64x2_t v) { 98 return vgetq_lane_p64(v, 1); 99 } 100 101 // CHECK-LABEL: define <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64> %v) #0 { 102 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8> 103 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 104 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 105 // CHECK: ret <1 x i64> [[VSET_LANE]] 106 poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) { 107 return vset_lane_p64(a, v, 0); 108 } 109 110 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #0 { 111 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> 112 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 113 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 114 // CHECK: ret <2 x i64> [[VSET_LANE]] 115 poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) { 116 return vsetq_lane_p64(a, v, 1); 117 } 118 119 // CHECK-LABEL: define <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x i64> %b) #0 { 120 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> 121 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 122 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 123 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %a to <8 x i8> 124 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 125 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 0 126 // CHECK: ret <1 x i64> [[VSET_LANE]] 127 poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) { 128 return vcopy_lane_p64(a, 0, b, 0); 129 130 } 131 132 // CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 { 133 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> 134 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 135 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 136 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8> 137 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 138 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 1 139 // CHECK: ret <2 x i64> [[VSET_LANE]] 140 poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { 141 return vcopyq_lane_p64(a, 1, b, 0); 142 } 143 144 // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #0 { 145 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 146 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 147 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 148 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8> 149 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 150 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 1 151 // CHECK: ret <2 x i64> [[VSET_LANE]] 152 poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) { 153 return vcopyq_laneq_p64(a, 1, b, 1); 154 } 155 156 // CHECK-LABEL: define <1 x i64> @test_vcreate_p64(i64 %a) #0 { 157 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> 158 // CHECK: ret <1 x i64> [[TMP0]] 159 poly64x1_t test_vcreate_p64(uint64_t a) { 160 return vcreate_p64(a); 161 } 162 163 // CHECK-LABEL: define <1 x i64> @test_vdup_n_p64(i64 %a) #0 { 164 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 165 // CHECK: ret <1 x i64> [[VECINIT_I]] 166 poly64x1_t test_vdup_n_p64(poly64_t a) { 167 return vdup_n_p64(a); 168 } 169 // CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #0 { 170 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 171 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 172 // CHECK: ret <2 x i64> [[VECINIT1_I]] 173 poly64x2_t test_vdupq_n_p64(poly64_t a) { 174 return vdupq_n_p64(a); 175 } 176 177 // CHECK-LABEL: define <1 x i64> @test_vmov_n_p64(i64 %a) #0 { 178 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 179 // CHECK: ret <1 x i64> [[VECINIT_I]] 180 poly64x1_t test_vmov_n_p64(poly64_t a) { 181 return vmov_n_p64(a); 182 } 183 184 // CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #0 { 185 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 186 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 187 // CHECK: ret <2 x i64> [[VECINIT1_I]] 188 poly64x2_t test_vmovq_n_p64(poly64_t a) { 189 return vmovq_n_p64(a); 190 } 191 192 // CHECK-LABEL: define <1 x i64> @test_vdup_lane_p64(<1 x i64> %vec) #0 { 193 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <1 x i32> zeroinitializer 194 // CHECK: ret <1 x i64> [[SHUFFLE]] 195 poly64x1_t test_vdup_lane_p64(poly64x1_t vec) { 196 return vdup_lane_p64(vec, 0); 197 } 198 199 // CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #0 { 200 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer 201 // CHECK: ret <2 x i64> [[SHUFFLE]] 202 poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) { 203 return vdupq_lane_p64(vec, 0); 204 } 205 206 // CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #0 { 207 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1> 208 // CHECK: ret <2 x i64> [[SHUFFLE]] 209 poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) { 210 return vdupq_laneq_p64(vec, 1); 211 } 212 213 // CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #0 { 214 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1> 215 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 216 poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) { 217 return vcombine_p64(low, high); 218 } 219 220 // CHECK-LABEL: define <1 x i64> @test_vld1_p64(i64* %ptr) #0 { 221 // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* 222 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 223 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]] 224 // CHECK: ret <1 x i64> [[TMP2]] 225 poly64x1_t test_vld1_p64(poly64_t const * ptr) { 226 return vld1_p64(ptr); 227 } 228 229 // CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #0 { 230 // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* 231 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 232 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]] 233 // CHECK: ret <2 x i64> [[TMP2]] 234 poly64x2_t test_vld1q_p64(poly64_t const * ptr) { 235 return vld1q_p64(ptr); 236 } 237 238 // CHECK-LABEL: define void @test_vst1_p64(i64* %ptr, <1 x i64> %val) #0 { 239 // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* 240 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %val to <8 x i8> 241 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>* 242 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 243 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]] 244 // CHECK: ret void 245 void test_vst1_p64(poly64_t * ptr, poly64x1_t val) { 246 return vst1_p64(ptr, val); 247 } 248 249 // CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #0 { 250 // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8* 251 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8> 252 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>* 253 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 254 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]] 255 // CHECK: ret void 256 void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) { 257 return vst1q_p64(ptr, val); 258 } 259 260 // CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #0 { 261 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 262 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 263 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 264 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* 265 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 266 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 267 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 268 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]] 269 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* 270 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* 271 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 16, i32 8, i1 false) 272 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 273 // CHECK: ret %struct.poly64x1x2_t [[TMP6]] 274 poly64x1x2_t test_vld2_p64(poly64_t const * ptr) { 275 return vld2_p64(ptr); 276 } 277 278 // CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #0 { 279 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 280 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 281 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 282 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* 283 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 284 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 285 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }* 286 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]] 287 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* 288 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* 289 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 16, i1 false) 290 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 291 // CHECK: ret %struct.poly64x2x2_t [[TMP6]] 292 poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) { 293 return vld2q_p64(ptr); 294 } 295 296 // CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #0 { 297 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 298 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 299 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 300 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* 301 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 302 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 303 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 304 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 305 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* 306 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* 307 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 24, i32 8, i1 false) 308 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 309 // CHECK: ret %struct.poly64x1x3_t [[TMP6]] 310 poly64x1x3_t test_vld3_p64(poly64_t const * ptr) { 311 return vld3_p64(ptr); 312 } 313 314 // CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #0 { 315 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 316 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 317 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 318 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* 319 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 320 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 321 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }* 322 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 323 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* 324 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* 325 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 48, i32 16, i1 false) 326 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 327 // CHECK: ret %struct.poly64x2x3_t [[TMP6]] 328 poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) { 329 return vld3q_p64(ptr); 330 } 331 332 // CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #0 { 333 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 334 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 335 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 336 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* 337 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>* 338 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]]) 339 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 340 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]] 341 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* 342 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* 343 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 32, i32 8, i1 false) 344 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 345 // CHECK: ret %struct.poly64x1x4_t [[TMP6]] 346 poly64x1x4_t test_vld4_p64(poly64_t const * ptr) { 347 return vld4_p64(ptr); 348 } 349 350 // CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #0 { 351 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 352 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 353 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 354 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8* 355 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>* 356 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]]) 357 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* 358 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]] 359 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* 360 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* 361 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP4]], i8* [[TMP5]], i64 64, i32 16, i1 false) 362 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 363 // CHECK: ret %struct.poly64x2x4_t [[TMP6]] 364 poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) { 365 return vld4q_p64(ptr); 366 } 367 368 // CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #0 { 369 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 370 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 371 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[VAL]], i32 0, i32 0 372 // CHECK: store [2 x <1 x i64>] [[VAL]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 373 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8* 374 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[VAL]] to i8* 375 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) 376 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* 377 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 378 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 0 379 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 380 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 381 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 382 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL2]], i64 0, i64 1 383 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 384 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 385 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 386 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 387 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]]) 388 // CHECK: ret void 389 void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) { 390 return vst2_p64(ptr, val); 391 } 392 393 // CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #0 { 394 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 395 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 396 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[VAL]], i32 0, i32 0 397 // CHECK: store [2 x <2 x i64>] [[VAL]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 398 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8* 399 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[VAL]] to i8* 400 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) 401 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* 402 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 403 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 0 404 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 405 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 406 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 407 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL2]], i64 0, i64 1 408 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 409 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 410 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 411 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 412 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]]) 413 // CHECK: ret void 414 void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) { 415 return vst2q_p64(ptr, val); 416 } 417 418 // CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #0 { 419 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 420 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 421 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[VAL]], i32 0, i32 0 422 // CHECK: store [3 x <1 x i64>] [[VAL]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 423 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8* 424 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[VAL]] to i8* 425 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) 426 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* 427 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 428 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 0 429 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 430 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 431 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 432 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL2]], i64 0, i64 1 433 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 434 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 435 // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 436 // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL4]], i64 0, i64 2 437 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8 438 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 439 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 440 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 441 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 442 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) 443 // CHECK: ret void 444 void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) { 445 return vst3_p64(ptr, val); 446 } 447 448 // CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #0 { 449 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 450 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 451 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[VAL]], i32 0, i32 0 452 // CHECK: store [3 x <2 x i64>] [[VAL]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 453 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8* 454 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[VAL]] to i8* 455 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) 456 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* 457 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 458 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 0 459 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 460 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 461 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 462 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL2]], i64 0, i64 1 463 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 464 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 465 // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 466 // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL4]], i64 0, i64 2 467 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16 468 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 469 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 470 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 471 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 472 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) 473 // CHECK: ret void 474 void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) { 475 return vst3q_p64(ptr, val); 476 } 477 478 // CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #0 { 479 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 480 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 481 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[VAL]], i32 0, i32 0 482 // CHECK: store [4 x <1 x i64>] [[VAL]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 483 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8* 484 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[VAL]] to i8* 485 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) 486 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* 487 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 488 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 0 489 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 490 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 491 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 492 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL2]], i64 0, i64 1 493 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 494 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 495 // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 496 // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL4]], i64 0, i64 2 497 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8 498 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 499 // CHECK: [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 500 // CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL6]], i64 0, i64 3 501 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX7]], align 8 502 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 503 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 504 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 505 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 506 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 507 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) 508 // CHECK: ret void 509 void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) { 510 return vst4_p64(ptr, val); 511 } 512 513 // CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #0 { 514 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 515 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 516 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[VAL]], i32 0, i32 0 517 // CHECK: store [4 x <2 x i64>] [[VAL]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 518 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8* 519 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[VAL]] to i8* 520 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) 521 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* 522 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 523 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 0 524 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 525 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 526 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 527 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL2]], i64 0, i64 1 528 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 529 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 530 // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 531 // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL4]], i64 0, i64 2 532 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16 533 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 534 // CHECK: [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 535 // CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL6]], i64 0, i64 3 536 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX7]], align 16 537 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 538 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 539 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 540 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 541 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 542 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) 543 // CHECK: ret void 544 void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) { 545 return vst4q_p64(ptr, val); 546 } 547 548 // CHECK-LABEL: define <1 x i64> @test_vext_p64(<1 x i64> %a, <1 x i64> %b) #0 { 549 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 550 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 551 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 552 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 553 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 554 // CHECK: ret <1 x i64> [[VEXT]] 555 poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) { 556 return vext_u64(a, b, 0); 557 558 } 559 560 // CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #0 { 561 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 562 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 563 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 564 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 565 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2> 566 // CHECK: ret <2 x i64> [[VEXT]] 567 poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) { 568 return vextq_p64(a, b, 1); 569 } 570 571 // CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #0 { 572 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 573 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 574 poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) { 575 return vzip1q_p64(a, b); 576 } 577 578 // CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #0 { 579 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 580 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 581 poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) { 582 return vzip2q_u64(a, b); 583 } 584 585 // CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #0 { 586 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 587 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 588 poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) { 589 return vuzp1q_p64(a, b); 590 } 591 592 // CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #0 { 593 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 594 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 595 poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) { 596 return vuzp2q_u64(a, b); 597 } 598 599 // CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #0 { 600 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 601 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 602 poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) { 603 return vtrn1q_p64(a, b); 604 } 605 606 // CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #0 { 607 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 608 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 609 poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { 610 return vtrn2q_u64(a, b); 611 } 612 613 // CHECK-LABEL: define <1 x i64> @test_vsri_n_p64(<1 x i64> %a, <1 x i64> %b) #0 { 614 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 615 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 616 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 617 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 618 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33) 619 // CHECK: ret <1 x i64> [[VSRI_N2]] 620 poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) { 621 return vsri_n_p64(a, b, 33); 622 } 623 624 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 { 625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 626 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 627 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 628 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 629 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64) 630 // CHECK: ret <2 x i64> [[VSRI_N2]] 631 poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) { 632 return vsriq_n_p64(a, b, 64); 633 } 634 635