Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s
      2 
      3 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
      4 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
      5 %struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x i8> }
      6 
      7 define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
      8 ; CHECK-LABEL: ld2_8b
      9 ; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
     10 ; and from the argument of the function also defined by ABI (i.e., x0)
     11 ; CHECK: ld2.8b { v0, v1 }, [x0]
     12 ; CHECK-NEXT: ret
     13 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
     14 	ret %struct.__neon_int8x8x2_t  %tmp2
     15 }
     16 
     17 define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
     18 ; CHECK-LABEL: ld3_8b
     19 ; Make sure we are using the operands defined by the ABI
     20 ; CHECK: ld3.8b { v0, v1, v2 }, [x0]
     21 ; CHECK-NEXT: ret
     22 	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
     23 	ret %struct.__neon_int8x8x3_t  %tmp2
     24 }
     25 
     26 define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
     27 ; CHECK-LABEL: ld4_8b
     28 ; Make sure we are using the operands defined by the ABI
     29 ; CHECK: ld4.8b { v0, v1, v2, v3 }, [x0]
     30 ; CHECK-NEXT: ret
     31 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
     32 	ret %struct.__neon_int8x8x4_t  %tmp2
     33 }
     34 
     35 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly
     36 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly
     37 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly
     38 
     39 %struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
     40 %struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
     41 %struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,  <16 x i8> }
     42 
     43 define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
     44 ; CHECK-LABEL: ld2_16b
     45 ; Make sure we are using the operands defined by the ABI
     46 ; CHECK: ld2.16b { v0, v1 }, [x0]
     47 ; CHECK-NEXT: ret
     48   %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
     49   ret %struct.__neon_int8x16x2_t  %tmp2
     50 }
     51 
     52 define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
     53 ; CHECK-LABEL: ld3_16b
     54 ; Make sure we are using the operands defined by the ABI
     55 ; CHECK: ld3.16b { v0, v1, v2 }, [x0]
     56 ; CHECK-NEXT: ret
     57   %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
     58   ret %struct.__neon_int8x16x3_t  %tmp2
     59 }
     60 
     61 define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
     62 ; CHECK-LABEL: ld4_16b
     63 ; Make sure we are using the operands defined by the ABI
     64 ; CHECK: ld4.16b { v0, v1, v2, v3 }, [x0]
     65 ; CHECK-NEXT: ret
     66   %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
     67   ret %struct.__neon_int8x16x4_t  %tmp2
     68 }
     69 
     70 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly
     71 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly
     72 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly
     73 
     74 %struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
     75 %struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
     76 %struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,  <4 x i16> }
     77 
     78 define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
     79 ; CHECK-LABEL: ld2_4h
     80 ; Make sure we are using the operands defined by the ABI
     81 ; CHECK: ld2.4h { v0, v1 }, [x0]
     82 ; CHECK-NEXT: ret
     83 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
     84 	ret %struct.__neon_int16x4x2_t  %tmp2
     85 }
     86 
     87 define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
     88 ; CHECK-LABEL: ld3_4h
     89 ; Make sure we are using the operands defined by the ABI
     90 ; CHECK: ld3.4h { v0, v1, v2 }, [x0]
     91 ; CHECK-NEXT: ret
     92 	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
     93 	ret %struct.__neon_int16x4x3_t  %tmp2
     94 }
     95 
     96 define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
     97 ; CHECK-LABEL: ld4_4h
     98 ; Make sure we are using the operands defined by the ABI
     99 ; CHECK: ld4.4h { v0, v1, v2, v3 }, [x0]
    100 ; CHECK-NEXT: ret
    101 	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
    102 	ret %struct.__neon_int16x4x4_t  %tmp2
    103 }
    104 
    105 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
    106 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
    107 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
    108 
    109 %struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
    110 %struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
    111 %struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,  <8 x i16> }
    112 
    113 define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
    114 ; CHECK-LABEL: ld2_8h
    115 ; Make sure we are using the operands defined by the ABI
    116 ; CHECK: ld2.8h { v0, v1 }, [x0]
    117 ; CHECK-NEXT: ret
    118   %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
    119   ret %struct.__neon_int16x8x2_t  %tmp2
    120 }
    121 
    122 define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
    123 ; CHECK-LABEL: ld3_8h
    124 ; Make sure we are using the operands defined by the ABI
    125 ; CHECK: ld3.8h { v0, v1, v2 }, [x0]
    126 ; CHECK-NEXT: ret
    127   %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
    128   ret %struct.__neon_int16x8x3_t %tmp2
    129 }
    130 
    131 define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
    132 ; CHECK-LABEL: ld4_8h
    133 ; Make sure we are using the operands defined by the ABI
    134 ; CHECK: ld4.8h { v0, v1, v2, v3 }, [x0]
    135 ; CHECK-NEXT: ret
    136   %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
    137   ret %struct.__neon_int16x8x4_t  %tmp2
    138 }
    139 
    140 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
    141 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
    142 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
    143 
    144 %struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
    145 %struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
    146 %struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,  <2 x i32> }
    147 
    148 define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
    149 ; CHECK-LABEL: ld2_2s
    150 ; Make sure we are using the operands defined by the ABI
    151 ; CHECK: ld2.2s { v0, v1 }, [x0]
    152 ; CHECK-NEXT: ret
    153 	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
    154 	ret %struct.__neon_int32x2x2_t  %tmp2
    155 }
    156 
    157 define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
    158 ; CHECK-LABEL: ld3_2s
    159 ; Make sure we are using the operands defined by the ABI
    160 ; CHECK: ld3.2s { v0, v1, v2 }, [x0]
    161 ; CHECK-NEXT: ret
    162 	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
    163 	ret %struct.__neon_int32x2x3_t  %tmp2
    164 }
    165 
    166 define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
    167 ; CHECK-LABEL: ld4_2s
    168 ; Make sure we are using the operands defined by the ABI
    169 ; CHECK: ld4.2s { v0, v1, v2, v3 }, [x0]
    170 ; CHECK-NEXT: ret
    171 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
    172 	ret %struct.__neon_int32x2x4_t  %tmp2
    173 }
    174 
    175 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
    176 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
    177 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
    178 
    179 %struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
    180 %struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
    181 %struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,  <4 x i32> }
    182 
    183 define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
    184 ; CHECK-LABEL: ld2_4s
    185 ; Make sure we are using the operands defined by the ABI
    186 ; CHECK: ld2.4s { v0, v1 }, [x0]
    187 ; CHECK-NEXT: ret
    188 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
    189 	ret %struct.__neon_int32x4x2_t  %tmp2
    190 }
    191 
    192 define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
    193 ; CHECK-LABEL: ld3_4s
    194 ; Make sure we are using the operands defined by the ABI
    195 ; CHECK: ld3.4s { v0, v1, v2 }, [x0]
    196 ; CHECK-NEXT: ret
    197 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
    198 	ret %struct.__neon_int32x4x3_t  %tmp2
    199 }
    200 
    201 define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
    202 ; CHECK-LABEL: ld4_4s
    203 ; Make sure we are using the operands defined by the ABI
    204 ; CHECK: ld4.4s { v0, v1, v2, v3 }, [x0]
    205 ; CHECK-NEXT: ret
    206 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
    207 	ret %struct.__neon_int32x4x4_t  %tmp2
    208 }
    209 
    210 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
    211 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
    212 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
    213 
    214 %struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
    215 %struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
    216 %struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,  <2 x i64> }
    217 
    218 define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
    219 ; CHECK-LABEL: ld2_2d
    220 ; Make sure we are using the operands defined by the ABI
    221 ; CHECK: ld2.2d { v0, v1 }, [x0]
    222 ; CHECK-NEXT: ret
    223 	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
    224 	ret %struct.__neon_int64x2x2_t  %tmp2
    225 }
    226 
    227 define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
    228 ; CHECK-LABEL: ld3_2d
    229 ; Make sure we are using the operands defined by the ABI
    230 ; CHECK: ld3.2d { v0, v1, v2 }, [x0]
    231 ; CHECK-NEXT: ret
    232 	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
    233 	ret %struct.__neon_int64x2x3_t  %tmp2
    234 }
    235 
    236 define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
    237 ; CHECK-LABEL: ld4_2d
    238 ; Make sure we are using the operands defined by the ABI
    239 ; CHECK: ld4.2d { v0, v1, v2, v3 }, [x0]
    240 ; CHECK-NEXT: ret
    241 	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
    242 	ret %struct.__neon_int64x2x4_t  %tmp2
    243 }
    244 
    245 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
    246 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
    247 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
    248 
    249 %struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
    250 %struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
    251 %struct.__neon_int64x1x4_t = type { <1 x i64>,  <1 x i64>, <1 x i64>, <1 x i64> }
    252 
    253 
    254 define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
    255 ; CHECK-LABEL: ld2_1di64
    256 ; Make sure we are using the operands defined by the ABI
    257 ; CHECK: ld1.1d { v0, v1 }, [x0]
    258 ; CHECK-NEXT: ret
    259 	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
    260 	ret %struct.__neon_int64x1x2_t  %tmp2
    261 }
    262 
    263 define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
    264 ; CHECK-LABEL: ld3_1di64
    265 ; Make sure we are using the operands defined by the ABI
    266 ; CHECK: ld1.1d { v0, v1, v2 }, [x0]
    267 ; CHECK-NEXT: ret
    268 	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
    269 	ret %struct.__neon_int64x1x3_t  %tmp2
    270 }
    271 
    272 define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
    273 ; CHECK-LABEL: ld4_1di64
    274 ; Make sure we are using the operands defined by the ABI
    275 ; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
    276 ; CHECK-NEXT: ret
    277 	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
    278 	ret %struct.__neon_int64x1x4_t  %tmp2
    279 }
    280 
    281 
    282 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
    283 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
    284 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
    285 
    286 %struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
    287 %struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x double> }
    288 %struct.__neon_float64x1x4_t = type { <1 x double>,  <1 x double>, <1 x double>, <1 x double> }
    289 
    290 
    291 define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
    292 ; CHECK-LABEL: ld2_1df64
    293 ; Make sure we are using the operands defined by the ABI
    294 ; CHECK: ld1.1d { v0, v1 }, [x0]
    295 ; CHECK-NEXT: ret
    296 	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
    297 	ret %struct.__neon_float64x1x2_t  %tmp2
    298 }
    299 
    300 define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
    301 ; CHECK-LABEL: ld3_1df64
    302 ; Make sure we are using the operands defined by the ABI
    303 ; CHECK: ld1.1d { v0, v1, v2 }, [x0]
    304 ; CHECK-NEXT: ret
    305 	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
    306 	ret %struct.__neon_float64x1x3_t  %tmp2
    307 }
    308 
    309 define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
    310 ; CHECK-LABEL: ld4_1df64
    311 ; Make sure we are using the operands defined by the ABI
    312 ; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
    313 ; CHECK-NEXT: ret
    314 	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
    315 	ret %struct.__neon_float64x1x4_t  %tmp2
    316 }
    317 
    318 declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
    319 declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
    320 declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
    321 
    322 
    323 define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind {
    324 ; Make sure we are using the operands defined by the ABI
    325 ; CHECK: ld2lane_16b
    326 ; CHECK: ld2.b { v0, v1 }[1], [x0]
    327 ; CHECK-NEXT: ret
    328 	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
    329 	ret %struct.__neon_int8x16x2_t  %tmp2
    330 }
    331 
    332 define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i8* %A) nounwind {
    333 ; Make sure we are using the operands defined by the ABI
    334 ; CHECK: ld3lane_16b
    335 ; CHECK: ld3.b { v0, v1, v2 }[1], [x0]
    336 ; CHECK-NEXT: ret
    337 	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
    338 	ret %struct.__neon_int8x16x3_t  %tmp2
    339 }
    340 
    341 define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind {
    342 ; Make sure we are using the operands defined by the ABI
    343 ; CHECK: ld4lane_16b
    344 ; CHECK: ld4.b { v0, v1, v2, v3 }[1], [x0]
    345 ; CHECK-NEXT: ret
    346 	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
    347 	ret %struct.__neon_int8x16x4_t  %tmp2
    348 }
    349 
    350 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
    351 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
    352 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
    353 
    354 define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind {
    355 ; Make sure we are using the operands defined by the ABI
    356 ; CHECK: ld2lane_8h
    357 ; CHECK: ld2.h { v0, v1 }[1], [x0]
    358 ; CHECK-NEXT: ret
    359 	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
    360 	ret %struct.__neon_int16x8x2_t  %tmp2
    361 }
    362 
    363 define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i16* %A) nounwind {
    364 ; Make sure we are using the operands defined by the ABI
    365 ; CHECK: ld3lane_8h
    366 ; CHECK: ld3.h { v0, v1, v2 }[1], [x0]
    367 ; CHECK-NEXT: ret
    368 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
    369 	ret %struct.__neon_int16x8x3_t  %tmp2
    370 }
    371 
    372 define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind {
    373 ; Make sure we are using the operands defined by the ABI
    374 ; CHECK: ld4lane_8h
    375 ; CHECK: ld4.h { v0, v1, v2, v3 }[1], [x0]
    376 ; CHECK-NEXT: ret
    377 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
    378 	ret %struct.__neon_int16x8x4_t  %tmp2
    379 }
    380 
    381 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
    382 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
    383 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
    384 
    385 define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind {
    386 ; Make sure we are using the operands defined by the ABI
    387 ; CHECK: ld2lane_4s
    388 ; CHECK: ld2.s { v0, v1 }[1], [x0]
    389 ; CHECK-NEXT: ret
    390 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
    391 	ret %struct.__neon_int32x4x2_t  %tmp2
    392 }
    393 
    394 define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i32* %A) nounwind {
    395 ; Make sure we are using the operands defined by the ABI
    396 ; CHECK: ld3lane_4s
    397 ; CHECK: ld3.s { v0, v1, v2 }[1], [x0]
    398 ; CHECK-NEXT: ret
    399 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
    400 	ret %struct.__neon_int32x4x3_t  %tmp2
    401 }
    402 
    403 define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind {
    404 ; Make sure we are using the operands defined by the ABI
    405 ; CHECK: ld4lane_4s
    406 ; CHECK: ld4.s { v0, v1, v2, v3 }[1], [x0]
    407 ; CHECK-NEXT: ret
    408 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
    409 	ret %struct.__neon_int32x4x4_t  %tmp2
    410 }
    411 
    412 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
    413 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
    414 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
    415 
    416 define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind {
    417 ; Make sure we are using the operands defined by the ABI
    418 ; CHECK: ld2lane_2d
    419 ; CHECK: ld2.d { v0, v1 }[1], [x0]
    420 ; CHECK-NEXT: ret
    421 	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
    422 	ret %struct.__neon_int64x2x2_t  %tmp2
    423 }
    424 
    425 define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64* %A) nounwind {
    426 ; Make sure we are using the operands defined by the ABI
    427 ; CHECK: ld3lane_2d
    428 ; CHECK: ld3.d { v0, v1, v2 }[1], [x0]
    429 ; CHECK-NEXT: ret
    430 	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
    431 	ret %struct.__neon_int64x2x3_t  %tmp2
    432 }
    433 
    434 define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind {
    435 ; Make sure we are using the operands defined by the ABI
    436 ; CHECK: ld4lane_2d
    437 ; CHECK: ld4.d { v0, v1, v2, v3 }[1], [x0]
    438 ; CHECK-NEXT: ret
    439 	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
    440 	ret %struct.__neon_int64x2x4_t  %tmp2
    441 }
    442 
    443 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
    444 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
    445 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
    446 
    447 define <8 x i8> @ld1r_8b(i8* %bar) {
    448 ; CHECK: ld1r_8b
    449 ; Make sure we are using the operands defined by the ABI
    450 ; CHECK: ld1r.8b { v0 }, [x0]
    451 ; CHECK-NEXT: ret
    452   %tmp1 = load i8, i8* %bar
    453   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
    454   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
    455   %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
    456   %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
    457   %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
    458   %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
    459   %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
    460   %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
    461   ret <8 x i8> %tmp9
    462 }
    463 
    464 define <16 x i8> @ld1r_16b(i8* %bar) {
    465 ; CHECK: ld1r_16b
    466 ; Make sure we are using the operands defined by the ABI
    467 ; CHECK: ld1r.16b { v0 }, [x0]
    468 ; CHECK-NEXT: ret
    469   %tmp1 = load i8, i8* %bar
    470   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
    471   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
    472   %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
    473   %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
    474   %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
    475   %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
    476   %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
    477   %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
    478   %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
    479   %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
    480   %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
    481   %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
    482   %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
    483   %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
    484   %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
    485   %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
    486   ret <16 x i8> %tmp17
    487 }
    488 
    489 define <4 x i16> @ld1r_4h(i16* %bar) {
    490 ; CHECK: ld1r_4h
    491 ; Make sure we are using the operands defined by the ABI
    492 ; CHECK: ld1r.4h { v0 }, [x0]
    493 ; CHECK-NEXT: ret
    494   %tmp1 = load i16, i16* %bar
    495   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
    496   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
    497   %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
    498   %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
    499   ret <4 x i16> %tmp5
    500 }
    501 
    502 define <8 x i16> @ld1r_8h(i16* %bar) {
    503 ; CHECK: ld1r_8h
    504 ; Make sure we are using the operands defined by the ABI
    505 ; CHECK: ld1r.8h { v0 }, [x0]
    506 ; CHECK-NEXT: ret
    507   %tmp1 = load i16, i16* %bar
    508   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
    509   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
    510   %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
    511   %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
    512   %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
    513   %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
    514   %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
    515   %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
    516   ret <8 x i16> %tmp9
    517 }
    518 
    519 define <2 x i32> @ld1r_2s(i32* %bar) {
    520 ; CHECK: ld1r_2s
    521 ; Make sure we are using the operands defined by the ABI
    522 ; CHECK: ld1r.2s { v0 }, [x0]
    523 ; CHECK-NEXT: ret
    524   %tmp1 = load i32, i32* %bar
    525   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
    526   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
    527   ret <2 x i32> %tmp3
    528 }
    529 
    530 define <4 x i32> @ld1r_4s(i32* %bar) {
    531 ; CHECK: ld1r_4s
    532 ; Make sure we are using the operands defined by the ABI
    533 ; CHECK: ld1r.4s { v0 }, [x0]
    534 ; CHECK-NEXT: ret
    535   %tmp1 = load i32, i32* %bar
    536   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
    537   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
    538   %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
    539   %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
    540   ret <4 x i32> %tmp5
    541 }
    542 
    543 define <2 x i64> @ld1r_2d(i64* %bar) {
    544 ; CHECK: ld1r_2d
    545 ; Make sure we are using the operands defined by the ABI
    546 ; CHECK: ld1r.2d { v0 }, [x0]
    547 ; CHECK-NEXT: ret
    548   %tmp1 = load i64, i64* %bar
    549   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
    550   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
    551   ret <2 x i64> %tmp3
    552 }
    553 
    554 define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
    555 ; CHECK: ld2r_8b
    556 ; Make sure we are using the operands defined by the ABI
    557 ; CHECK: ld2r.8b { v0, v1 }, [x0]
    558 ; CHECK-NEXT: ret
    559 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
    560 	ret %struct.__neon_int8x8x2_t  %tmp2
    561 }
    562 
    563 define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
    564 ; CHECK: ld3r_8b
    565 ; Make sure we are using the operands defined by the ABI
    566 ; CHECK: ld3r.8b { v0, v1, v2 }, [x0]
    567 ; CHECK-NEXT: ret
    568 	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
    569 	ret %struct.__neon_int8x8x3_t  %tmp2
    570 }
    571 
    572 define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
    573 ; CHECK: ld4r_8b
    574 ; Make sure we are using the operands defined by the ABI
    575 ; CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0]
    576 ; CHECK-NEXT: ret
    577 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
    578 	ret %struct.__neon_int8x8x4_t  %tmp2
    579 }
    580 
    581 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
    582 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
    583 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
    584 
    585 define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
    586 ; CHECK: ld2r_16b
    587 ; Make sure we are using the operands defined by the ABI
    588 ; CHECK: ld2r.16b { v0, v1 }, [x0]
    589 ; CHECK-NEXT: ret
    590 	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
    591 	ret %struct.__neon_int8x16x2_t  %tmp2
    592 }
    593 
    594 define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
    595 ; CHECK: ld3r_16b
    596 ; Make sure we are using the operands defined by the ABI
    597 ; CHECK: ld3r.16b { v0, v1, v2 }, [x0]
    598 ; CHECK-NEXT: ret
    599 	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
    600 	ret %struct.__neon_int8x16x3_t  %tmp2
    601 }
    602 
    603 define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
    604 ; CHECK: ld4r_16b
    605 ; Make sure we are using the operands defined by the ABI
    606 ; CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0]
    607 ; CHECK-NEXT: ret
    608 	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
    609 	ret %struct.__neon_int8x16x4_t  %tmp2
    610 }
    611 
    612 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
    613 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
    614 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
    615 
    616 define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
    617 ; CHECK: ld2r_4h
    618 ; Make sure we are using the operands defined by the ABI
    619 ; CHECK: ld2r.4h { v0, v1 }, [x0]
    620 ; CHECK-NEXT: ret
    621 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
    622 	ret %struct.__neon_int16x4x2_t  %tmp2
    623 }
    624 
    625 define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
    626 ; CHECK: ld3r_4h
    627 ; Make sure we are using the operands defined by the ABI
    628 ; CHECK: ld3r.4h { v0, v1, v2 }, [x0]
    629 ; CHECK-NEXT: ret
    630 	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
    631 	ret %struct.__neon_int16x4x3_t  %tmp2
    632 }
    633 
    634 define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
    635 ; CHECK: ld4r_4h
    636 ; Make sure we are using the operands defined by the ABI
    637 ; CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0]
    638 ; CHECK-NEXT: ret
    639 	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
    640 	ret %struct.__neon_int16x4x4_t  %tmp2
    641 }
    642 
    643 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
    644 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
    645 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
    646 
    647 define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
    648 ; CHECK: ld2r_8h
    649 ; Make sure we are using the operands defined by the ABI
    650 ; CHECK: ld2r.8h { v0, v1 }, [x0]
    651 ; CHECK-NEXT: ret
    652   %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
    653   ret %struct.__neon_int16x8x2_t  %tmp2
    654 }
    655 
    656 define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
    657 ; CHECK: ld3r_8h
    658 ; Make sure we are using the operands defined by the ABI
    659 ; CHECK: ld3r.8h { v0, v1, v2 }, [x0]
    660 ; CHECK-NEXT: ret
    661   %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
    662   ret %struct.__neon_int16x8x3_t  %tmp2
    663 }
    664 
    665 define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
    666 ; CHECK: ld4r_8h
    667 ; Make sure we are using the operands defined by the ABI
    668 ; CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0]
    669 ; CHECK-NEXT: ret
    670   %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
    671   ret %struct.__neon_int16x8x4_t  %tmp2
    672 }
    673 
    674 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
    675 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
    676 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
    677 
    678 define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
    679 ; CHECK: ld2r_2s
    680 ; Make sure we are using the operands defined by the ABI
    681 ; CHECK: ld2r.2s { v0, v1 }, [x0]
    682 ; CHECK-NEXT: ret
    683 	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
    684 	ret %struct.__neon_int32x2x2_t  %tmp2
    685 }
    686 
    687 define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
    688 ; CHECK: ld3r_2s
    689 ; Make sure we are using the operands defined by the ABI
    690 ; CHECK: ld3r.2s { v0, v1, v2 }, [x0]
    691 ; CHECK-NEXT: ret
    692 	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
    693 	ret %struct.__neon_int32x2x3_t  %tmp2
    694 }
    695 
    696 define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
    697 ; CHECK: ld4r_2s
    698 ; Make sure we are using the operands defined by the ABI
    699 ; CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0]
    700 ; CHECK-NEXT: ret
    701 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
    702 	ret %struct.__neon_int32x2x4_t  %tmp2
    703 }
    704 
    705 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
    706 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
    707 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
    708 
    709 define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
    710 ; CHECK: ld2r_4s
    711 ; Make sure we are using the operands defined by the ABI
    712 ; CHECK: ld2r.4s { v0, v1 }, [x0]
    713 ; CHECK-NEXT: ret
    714 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
    715 	ret %struct.__neon_int32x4x2_t  %tmp2
    716 }
    717 
    718 define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
    719 ; CHECK: ld3r_4s
    720 ; Make sure we are using the operands defined by the ABI
    721 ; CHECK: ld3r.4s { v0, v1, v2 }, [x0]
    722 ; CHECK-NEXT: ret
    723 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
    724 	ret %struct.__neon_int32x4x3_t  %tmp2
    725 }
    726 
    727 define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
    728 ; CHECK: ld4r_4s
    729 ; Make sure we are using the operands defined by the ABI
    730 ; CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0]
    731 ; CHECK-NEXT: ret
    732 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
    733 	ret %struct.__neon_int32x4x4_t  %tmp2
    734 }
    735 
    736 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
    737 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
    738 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
    739 
    740 define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
    741 ; CHECK: ld2r_1d
    742 ; Make sure we are using the operands defined by the ABI
    743 ; CHECK: ld2r.1d { v0, v1 }, [x0]
    744 ; CHECK-NEXT: ret
    745 	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
    746 	ret %struct.__neon_int64x1x2_t  %tmp2
    747 }
    748 
    749 define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
    750 ; CHECK: ld3r_1d
    751 ; Make sure we are using the operands defined by the ABI
    752 ; CHECK: ld3r.1d { v0, v1, v2 }, [x0]
    753 ; CHECK-NEXT: ret
    754 	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
    755 	ret %struct.__neon_int64x1x3_t  %tmp2
    756 }
    757 
    758 define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind {
    759 ; CHECK: ld4r_1d
    760 ; Make sure we are using the operands defined by the ABI
    761 ; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0]
    762 ; CHECK-NEXT: ret
    763 	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
    764 	ret %struct.__neon_int64x1x4_t  %tmp2
    765 }
    766 
    767 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
    768 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
    769 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
    770 
    771 define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
    772 ; CHECK: ld2r_2d
    773 ; Make sure we are using the operands defined by the ABI
    774 ; CHECK: ld2r.2d { v0, v1 }, [x0]
    775 ; CHECK-NEXT: ret
    776 	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
    777 	ret %struct.__neon_int64x2x2_t  %tmp2
    778 }
    779 
    780 define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
    781 ; CHECK: ld3r_2d
    782 ; Make sure we are using the operands defined by the ABI
    783 ; CHECK: ld3r.2d { v0, v1, v2 }, [x0]
    784 ; CHECK-NEXT: ret
    785 	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
    786 	ret %struct.__neon_int64x2x3_t  %tmp2
    787 }
    788 
    789 define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
    790 ; CHECK: ld4r_2d
    791 ; Make sure we are using the operands defined by the ABI
    792 ; CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0]
    793 ; CHECK-NEXT: ret
    794 	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
    795 	ret %struct.__neon_int64x2x4_t  %tmp2
    796 }
    797 
    798 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
    799 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
    800 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
    801 
    802 define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
    803 ; CHECK-LABEL: ld1_16b
    804 ; Make sure we are using the operands defined by the ABI
    805 ; CHECK: ld1.b { v0 }[0], [x0]
    806 ; CHECK-NEXT: ret
    807   %tmp1 = load i8, i8* %bar
    808   %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
    809   ret <16 x i8> %tmp2
    810 }
    811 
    812 define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
    813 ; CHECK-LABEL: ld1_8h
    814 ; Make sure we are using the operands defined by the ABI
    815 ; CHECK: ld1.h { v0 }[0], [x0]
    816 ; CHECK-NEXT: ret
    817   %tmp1 = load i16, i16* %bar
    818   %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
    819   ret <8 x i16> %tmp2
    820 }
    821 
    822 define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
    823 ; CHECK-LABEL: ld1_4s
    824 ; Make sure we are using the operands defined by the ABI
    825 ; CHECK: ld1.s { v0 }[0], [x0]
    826 ; CHECK-NEXT: ret
    827   %tmp1 = load i32, i32* %bar
    828   %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
    829   ret <4 x i32> %tmp2
    830 }
    831 
    832 define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
    833 ; CHECK-LABEL: ld1_4s_float:
    834 ; Make sure we are using the operands defined by the ABI
    835 ; CHECK: ld1.s { v0 }[0], [x0]
    836 ; CHECK-NEXT: ret
    837   %tmp1 = load float, float* %bar
    838   %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
    839   ret <4 x float> %tmp2
    840 }
    841 
    842 define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
    843 ; CHECK-LABEL: ld1_2d
    844 ; Make sure we are using the operands defined by the ABI
    845 ; CHECK: ld1.d { v0 }[0], [x0]
    846 ; CHECK-NEXT: ret
    847   %tmp1 = load i64, i64* %bar
    848   %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
    849   ret <2 x i64> %tmp2
    850 }
    851 
    852 define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
    853 ; CHECK-LABEL: ld1_2d_double:
    854 ; Make sure we are using the operands defined by the ABI
    855 ; CHECK: ld1.d { v0 }[0], [x0]
    856 ; CHECK-NEXT: ret
    857   %tmp1 = load double, double* %bar
    858   %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
    859   ret <2 x double> %tmp2
    860 }
    861 
    862 define <1 x i64> @ld1_1d(<1 x i64>* %p) {
    863 ; CHECK-LABEL: ld1_1d
    864 ; Make sure we are using the operands defined by the ABI
    865 ; CHECK: ldr [[REG:d[0-9]+]], [x0]
    866 ; CHECK-NEXT: ret
    867   %tmp = load <1 x i64>, <1 x i64>* %p, align 8
    868   ret <1 x i64> %tmp
    869 }
    870 
    871 define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
    872 ; CHECK-LABEL: ld1_8b
    873 ; Make sure we are using the operands defined by the ABI
    874 ; CHECK: ld1.b { v0 }[0], [x0]
    875 ; CHECK-NEXT: ret
    876   %tmp1 = load i8, i8* %bar
    877   %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
    878   ret <8 x i8> %tmp2
    879 }
    880 
    881 define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
    882 ; CHECK-LABEL: ld1_4h
    883 ; Make sure we are using the operands defined by the ABI
    884 ; CHECK: ld1.h { v0 }[0], [x0]
    885 ; CHECK-NEXT: ret
    886   %tmp1 = load i16, i16* %bar
    887   %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
    888   ret <4 x i16> %tmp2
    889 }
    890 
    891 define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
    892 ; CHECK-LABEL: ld1_2s:
    893 ; Make sure we are using the operands defined by the ABI
    894 ; CHECK: ld1.s { v0 }[0], [x0]
    895 ; CHECK-NEXT: ret
    896   %tmp1 = load i32, i32* %bar
    897   %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
    898   ret <2 x i32> %tmp2
    899 }
    900 
    901 define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
    902 ; CHECK-LABEL: ld1_2s_float:
    903 ; Make sure we are using the operands defined by the ABI
    904 ; CHECK: ld1.s { v0 }[0], [x0]
    905 ; CHECK-NEXT: ret
    906   %tmp1 = load float, float* %bar
    907   %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
    908   ret <2 x float> %tmp2
    909 }
    910 
    911 
    912 ; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
    913 define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp {
    914 entry:
    915 ; CHECK: ld1r_2s_from_dup
    916 ; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
    917 ; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
    918 ; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
    919 ; CHECK-NEXT: str d[[RESREGNUM]], [x2]
    920 ; CHECK-NEXT: ret
    921   %tmp = bitcast i8* %a to i32*
    922   %tmp1 = load i32, i32* %tmp, align 4
    923   %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
    924   %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
    925   %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
    926   %tmp4 = bitcast i8* %b to i32*
    927   %tmp5 = load i32, i32* %tmp4, align 4
    928   %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
    929   %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
    930   %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
    931   %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
    932   %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
    933   %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
    934   %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
    935   %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
    936   %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
    937   %tmp10 = bitcast i16* %diff to <4 x i16>*
    938   store <4 x i16> %tmp9, <4 x i16>* %tmp10, align 8
    939   ret void
    940 }
    941 
    942 ; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
    943 define <4 x float> @ld1r_4s_float(float* nocapture %x) {
    944 entry:
    945 ; CHECK-LABEL: ld1r_4s_float
    946 ; Make sure we are using the operands defined by the ABI
    947 ; CHECK: ld1r.4s { v0 }, [x0]
    948 ; CHECK-NEXT: ret
    949   %tmp = load float, float* %x, align 4
    950   %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
    951   %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
    952   %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
    953   %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
    954   ret <4 x float> %tmp4
    955 }
    956 
    957 define <2 x float> @ld1r_2s_float(float* nocapture %x) {
    958 entry:
    959 ; CHECK-LABEL: ld1r_2s_float
    960 ; Make sure we are using the operands defined by the ABI
    961 ; CHECK: ld1r.2s { v0 }, [x0]
    962 ; CHECK-NEXT: ret
    963   %tmp = load float, float* %x, align 4
    964   %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
    965   %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
    966   ret <2 x float> %tmp2
    967 }
    968 
    969 define <2 x double> @ld1r_2d_double(double* nocapture %x) {
    970 entry:
    971 ; CHECK-LABEL: ld1r_2d_double
    972 ; Make sure we are using the operands defined by the ABI
    973 ; CHECK: ld1r.2d { v0 }, [x0]
    974 ; CHECK-NEXT: ret
    975   %tmp = load double, double* %x, align 4
    976   %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
    977   %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
    978   ret <2 x double> %tmp2
    979 }
    980 
    981 define <1 x double> @ld1r_1d_double(double* nocapture %x) {
    982 entry:
    983 ; CHECK-LABEL: ld1r_1d_double
    984 ; Make sure we are using the operands defined by the ABI
    985 ; CHECK: ldr d0, [x0]
    986 ; CHECK-NEXT: ret
    987   %tmp = load double, double* %x, align 4
    988   %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
    989   ret <1 x double> %tmp1
    990 }
    991 
    992 define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
    993 entry:
    994 ; CHECK-LABEL: ld1r_4s_float_shuff
    995 ; Make sure we are using the operands defined by the ABI
    996 ; CHECK: ld1r.4s { v0 }, [x0]
    997 ; CHECK-NEXT: ret
    998   %tmp = load float, float* %x, align 4
    999   %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
   1000   %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
   1001   ret <4 x float> %lane
   1002 }
   1003 
   1004 define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
   1005 entry:
   1006 ; CHECK-LABEL: ld1r_2s_float_shuff
   1007 ; Make sure we are using the operands defined by the ABI
   1008 ; CHECK: ld1r.2s { v0 }, [x0]
   1009 ; CHECK-NEXT: ret
   1010   %tmp = load float, float* %x, align 4
   1011   %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
   1012   %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
   1013   ret <2 x float> %lane
   1014 }
   1015 
   1016 define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
   1017 entry:
   1018 ; CHECK-LABEL: ld1r_2d_double_shuff
   1019 ; Make sure we are using the operands defined by the ABI
   1020 ; CHECK: ld1r.2d { v0 }, [x0]
   1021 ; CHECK-NEXT: ret
   1022   %tmp = load double, double* %x, align 4
   1023   %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
   1024   %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
   1025   ret <2 x double> %lane
   1026 }
   1027 
   1028 define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
   1029 entry:
   1030 ; CHECK-LABEL: ld1r_1d_double_shuff
   1031 ; Make sure we are using the operands defined by the ABI
   1032 ; CHECK: ldr d0, [x0]
   1033 ; CHECK-NEXT: ret
   1034   %tmp = load double, double* %x, align 4
   1035   %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
   1036   %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
   1037   ret <1 x double> %lane
   1038 }
   1039 
   1040 %struct.__neon_float32x2x2_t = type { <2 x float>,  <2 x float> }
   1041 %struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x float> }
   1042 %struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x float>,  <2 x float> }
   1043 
   1044 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly
   1045 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
   1046 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
   1047 declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
   1048 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
   1049 declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
   1050 
   1051 define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) {
   1052 ; CHECK-LABEL: ld1_x2_v8i8:
   1053 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1054   %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %addr)
   1055   ret %struct.__neon_int8x8x2_t %val
   1056 }
   1057 
   1058 define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) {
   1059 ; CHECK-LABEL: ld1_x2_v4i16:
   1060 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1061   %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %addr)
   1062   ret %struct.__neon_int16x4x2_t %val
   1063 }
   1064 
   1065 define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) {
   1066 ; CHECK-LABEL: ld1_x2_v2i32:
   1067 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1068   %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %addr)
   1069   ret %struct.__neon_int32x2x2_t %val
   1070 }
   1071 
   1072 define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) {
   1073 ; CHECK-LABEL: ld1_x2_v2f32:
   1074 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1075   %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr)
   1076   ret %struct.__neon_float32x2x2_t %val
   1077 }
   1078 
   1079 define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) {
   1080 ; CHECK-LABEL: ld1_x2_v1i64:
   1081 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1082   %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %addr)
   1083   ret %struct.__neon_int64x1x2_t %val
   1084 }
   1085 
   1086 define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
   1087 ; CHECK-LABEL: ld1_x2_v1f64:
   1088 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1089   %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %addr)
   1090   ret %struct.__neon_float64x1x2_t %val
   1091 }
   1092 
   1093 
   1094 %struct.__neon_float32x4x2_t = type { <4 x float>,  <4 x float> }
   1095 %struct.__neon_float32x4x3_t = type { <4 x float>,  <4 x float>,  <4 x float> }
   1096 %struct.__neon_float32x4x4_t = type { <4 x float>,  <4 x float>, <4 x float>,  <4 x float> }
   1097 
   1098 %struct.__neon_float64x2x2_t = type { <2 x double>,  <2 x double> }
   1099 %struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2 x double> }
   1100 %struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x double>,  <2 x double> }
   1101 
   1102 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
   1103 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
   1104 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
   1105 declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
   1106 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
   1107 declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
   1108 
   1109 define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) {
   1110 ; CHECK-LABEL: ld1_x2_v16i8:
   1111 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1112   %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %addr)
   1113   ret %struct.__neon_int8x16x2_t %val
   1114 }
   1115 
   1116 define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) {
   1117 ; CHECK-LABEL: ld1_x2_v8i16:
   1118 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1119   %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %addr)
   1120   ret %struct.__neon_int16x8x2_t %val
   1121 }
   1122 
   1123 define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) {
   1124 ; CHECK-LABEL: ld1_x2_v4i32:
   1125 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1126   %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %addr)
   1127   ret %struct.__neon_int32x4x2_t %val
   1128 }
   1129 
   1130 define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) {
   1131 ; CHECK-LABEL: ld1_x2_v4f32:
   1132 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1133   %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %addr)
   1134   ret %struct.__neon_float32x4x2_t %val
   1135 }
   1136 
   1137 define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) {
   1138 ; CHECK-LABEL: ld1_x2_v2i64:
   1139 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1140   %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %addr)
   1141   ret %struct.__neon_int64x2x2_t %val
   1142 }
   1143 
   1144 define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) {
   1145 ; CHECK-LABEL: ld1_x2_v2f64:
   1146 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1147   %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %addr)
   1148   ret %struct.__neon_float64x2x2_t %val
   1149 }
   1150 
   1151 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly
   1152 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
   1153 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
   1154 declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
   1155 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
   1156 declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
   1157 
   1158 define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) {
   1159 ; CHECK-LABEL: ld1_x3_v8i8:
   1160 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1161   %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %addr)
   1162   ret %struct.__neon_int8x8x3_t %val
   1163 }
   1164 
   1165 define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) {
   1166 ; CHECK-LABEL: ld1_x3_v4i16:
   1167 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1168   %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %addr)
   1169   ret %struct.__neon_int16x4x3_t %val
   1170 }
   1171 
   1172 define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) {
   1173 ; CHECK-LABEL: ld1_x3_v2i32:
   1174 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1175   %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %addr)
   1176   ret %struct.__neon_int32x2x3_t %val
   1177 }
   1178 
   1179 define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) {
   1180 ; CHECK-LABEL: ld1_x3_v2f32:
   1181 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1182   %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr)
   1183   ret %struct.__neon_float32x2x3_t %val
   1184 }
   1185 
   1186 define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) {
   1187 ; CHECK-LABEL: ld1_x3_v1i64:
   1188 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1189   %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %addr)
   1190   ret %struct.__neon_int64x1x3_t %val
   1191 }
   1192 
   1193 define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) {
   1194 ; CHECK-LABEL: ld1_x3_v1f64:
   1195 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1196   %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %addr)
   1197   ret %struct.__neon_float64x1x3_t %val
   1198 }
   1199 
   1200 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
   1201 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
   1202 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
   1203 declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
   1204 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
   1205 declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
   1206 
   1207 define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) {
   1208 ; CHECK-LABEL: ld1_x3_v16i8:
   1209 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1210   %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %addr)
   1211   ret %struct.__neon_int8x16x3_t %val
   1212 }
   1213 
   1214 define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) {
   1215 ; CHECK-LABEL: ld1_x3_v8i16:
   1216 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1217   %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %addr)
   1218   ret %struct.__neon_int16x8x3_t %val
   1219 }
   1220 
   1221 define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) {
   1222 ; CHECK-LABEL: ld1_x3_v4i32:
   1223 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1224   %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %addr)
   1225   ret %struct.__neon_int32x4x3_t %val
   1226 }
   1227 
   1228 define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) {
   1229 ; CHECK-LABEL: ld1_x3_v4f32:
   1230 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1231   %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %addr)
   1232   ret %struct.__neon_float32x4x3_t %val
   1233 }
   1234 
   1235 define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) {
   1236 ; CHECK-LABEL: ld1_x3_v2i64:
   1237 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1238   %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %addr)
   1239   ret %struct.__neon_int64x2x3_t %val
   1240 }
   1241 
   1242 define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) {
   1243 ; CHECK-LABEL: ld1_x3_v2f64:
   1244 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1245   %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %addr)
   1246   ret %struct.__neon_float64x2x3_t %val
   1247 }
   1248 
   1249 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly
   1250 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
   1251 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
   1252 declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
   1253 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
   1254 declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
   1255 
   1256 define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) {
   1257 ; CHECK-LABEL: ld1_x4_v8i8:
   1258 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1259   %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %addr)
   1260   ret %struct.__neon_int8x8x4_t %val
   1261 }
   1262 
   1263 define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) {
   1264 ; CHECK-LABEL: ld1_x4_v4i16:
   1265 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1266   %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %addr)
   1267   ret %struct.__neon_int16x4x4_t %val
   1268 }
   1269 
   1270 define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) {
   1271 ; CHECK-LABEL: ld1_x4_v2i32:
   1272 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1273   %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %addr)
   1274   ret %struct.__neon_int32x2x4_t %val
   1275 }
   1276 
   1277 define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) {
   1278 ; CHECK-LABEL: ld1_x4_v2f32:
   1279 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1280   %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr)
   1281   ret %struct.__neon_float32x2x4_t %val
   1282 }
   1283 
   1284 define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) {
   1285 ; CHECK-LABEL: ld1_x4_v1i64:
   1286 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1287   %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %addr)
   1288   ret %struct.__neon_int64x1x4_t %val
   1289 }
   1290 
   1291 define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) {
   1292 ; CHECK-LABEL: ld1_x4_v1f64:
   1293 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1294   %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %addr)
   1295   ret %struct.__neon_float64x1x4_t %val
   1296 }
   1297 
   1298 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
   1299 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
   1300 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
   1301 declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
   1302 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
   1303 declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
   1304 
   1305 define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) {
   1306 ; CHECK-LABEL: ld1_x4_v16i8:
   1307 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1308   %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %addr)
   1309   ret %struct.__neon_int8x16x4_t %val
   1310 }
   1311 
   1312 define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) {
   1313 ; CHECK-LABEL: ld1_x4_v8i16:
   1314 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1315   %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %addr)
   1316   ret %struct.__neon_int16x8x4_t %val
   1317 }
   1318 
   1319 define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) {
   1320 ; CHECK-LABEL: ld1_x4_v4i32:
   1321 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1322   %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %addr)
   1323   ret %struct.__neon_int32x4x4_t %val
   1324 }
   1325 
   1326 define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) {
   1327 ; CHECK-LABEL: ld1_x4_v4f32:
   1328 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1329   %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %addr)
   1330   ret %struct.__neon_float32x4x4_t %val
   1331 }
   1332 
   1333 define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) {
   1334 ; CHECK-LABEL: ld1_x4_v2i64:
   1335 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1336   %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %addr)
   1337   ret %struct.__neon_int64x2x4_t %val
   1338 }
   1339 
   1340 define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) {
   1341 ; CHECK-LABEL: ld1_x4_v2f64:
   1342 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1343   %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %addr)
   1344   ret %struct.__neon_float64x2x4_t %val
   1345 }
   1346