Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
      2 
      3 
      4 %struct.uint8x16x2_t = type { [2 x <16 x i8>] }
      5 %struct.poly8x16x2_t = type { [2 x <16 x i8>] }
      6 %struct.uint8x16x3_t = type { [3 x <16 x i8>] }
      7 %struct.int8x16x2_t = type { [2 x <16 x i8>] }
      8 %struct.int16x8x2_t = type { [2 x <8 x i16>] }
      9 %struct.int32x4x2_t = type { [2 x <4 x i32>] }
     10 %struct.int64x2x2_t = type { [2 x <2 x i64>] }
     11 %struct.float32x4x2_t = type { [2 x <4 x float>] }
     12 %struct.float64x2x2_t = type { [2 x <2 x double>] }
     13 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
     14 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
     15 %struct.int32x2x2_t = type { [2 x <2 x i32>] }
     16 %struct.int64x1x2_t = type { [2 x <1 x i64>] }
     17 %struct.float32x2x2_t = type { [2 x <2 x float>] }
     18 %struct.float64x1x2_t = type { [2 x <1 x double>] }
     19 %struct.int8x16x3_t = type { [3 x <16 x i8>] }
     20 %struct.int16x8x3_t = type { [3 x <8 x i16>] }
     21 %struct.int32x4x3_t = type { [3 x <4 x i32>] }
     22 %struct.int64x2x3_t = type { [3 x <2 x i64>] }
     23 %struct.float32x4x3_t = type { [3 x <4 x float>] }
     24 %struct.float64x2x3_t = type { [3 x <2 x double>] }
     25 %struct.int8x8x3_t = type { [3 x <8 x i8>] }
     26 %struct.int16x4x3_t = type { [3 x <4 x i16>] }
     27 %struct.int32x2x3_t = type { [3 x <2 x i32>] }
     28 %struct.int64x1x3_t = type { [3 x <1 x i64>] }
     29 %struct.float32x2x3_t = type { [3 x <2 x float>] }
     30 %struct.float64x1x3_t = type { [3 x <1 x double>] }
     31 %struct.int8x16x4_t = type { [4 x <16 x i8>] }
     32 %struct.int16x8x4_t = type { [4 x <8 x i16>] }
     33 %struct.int32x4x4_t = type { [4 x <4 x i32>] }
     34 %struct.int64x2x4_t = type { [4 x <2 x i64>] }
     35 %struct.float32x4x4_t = type { [4 x <4 x float>] }
     36 %struct.float64x2x4_t = type { [4 x <2 x double>] }
     37 %struct.int8x8x4_t = type { [4 x <8 x i8>] }
     38 %struct.int16x4x4_t = type { [4 x <4 x i16>] }
     39 %struct.int32x2x4_t = type { [4 x <2 x i32>] }
     40 %struct.int64x1x4_t = type { [4 x <1 x i64>] }
     41 %struct.float32x2x4_t = type { [4 x <2 x float>] }
     42 %struct.float64x1x4_t = type { [4 x <1 x double>] }
     43 
     44 define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) {
     45 ; CHECK-LABEL: test_ld_from_poll_v16i8:
     46 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
     47 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
     48 entry:
     49   %b = add <16 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 2, i8 13, i8 14, i8 15, i8 16>
     50   ret <16 x i8> %b
     51 }
     52 
     53 define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) {
     54 ; CHECK-LABEL: test_ld_from_poll_v8i16:
     55 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
     56 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
     57 entry:
     58   %b = add <8 x i16> %a, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
     59   ret <8 x i16> %b
     60 }
     61 
     62 define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) {
     63 ; CHECK-LABEL: test_ld_from_poll_v4i32:
     64 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
     65 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
     66 entry:
     67   %b = add <4 x i32> %a, <i32 1, i32 2, i32 3, i32 4>
     68   ret <4 x i32> %b
     69 }
     70 
     71 define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) {
     72 ; CHECK-LABEL: test_ld_from_poll_v2i64:
     73 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
     74 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
     75 entry:
     76   %b = add <2 x i64> %a, <i64 1, i64 2>
     77   ret <2 x i64> %b
     78 }
     79 
     80 define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) {
     81 ; CHECK-LABEL: test_ld_from_poll_v4f32:
     82 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
     83 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
     84 entry:
     85   %b = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0>
     86   ret <4 x float> %b
     87 }
     88 
     89 define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) {
     90 ; CHECK-LABEL: test_ld_from_poll_v2f64:
     91 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
     92 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
     93 entry:
     94   %b = fadd <2 x double> %a, <double 1.0, double 2.0>
     95   ret <2 x double> %b
     96 }
     97 
     98 define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) {
     99 ; CHECK-LABEL: test_ld_from_poll_v8i8:
    100 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
    101 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
    102 entry:
    103   %b = add <8 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
    104   ret <8 x i8> %b
    105 }
    106 
    107 define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) {
    108 ; CHECK-LABEL: test_ld_from_poll_v4i16:
    109 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
    110 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
    111 entry:
    112   %b = add <4 x i16> %a, <i16 1, i16 2, i16 3, i16 4>
    113   ret <4 x i16> %b
    114 }
    115 
    116 define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) {
    117 ; CHECK-LABEL: test_ld_from_poll_v2i32:
    118 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
    119 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
    120 entry:
    121   %b = add <2 x i32> %a, <i32 1, i32 2>
    122   ret <2 x i32> %b
    123 }
    124 
    125 define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
    126 ; CHECK-LABEL: test_vld1q_dup_s8:
    127 ; CHECK: ld1r {{{ ?v[0-9]+.16b ?}}}, [x0]
    128 entry:
    129   %0 = load i8, i8* %a, align 1
    130   %1 = insertelement <16 x i8> undef, i8 %0, i32 0
    131   %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
    132   ret <16 x i8> %lane
    133 }
    134 
    135 define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
    136 ; CHECK-LABEL: test_vld1q_dup_s16:
    137 ; CHECK: ld1r {{{ ?v[0-9]+.8h ?}}}, [x0]
    138 entry:
    139   %0 = load i16, i16* %a, align 2
    140   %1 = insertelement <8 x i16> undef, i16 %0, i32 0
    141   %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
    142   ret <8 x i16> %lane
    143 }
    144 
    145 define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
    146 ; CHECK-LABEL: test_vld1q_dup_s32:
    147 ; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
    148 entry:
    149   %0 = load i32, i32* %a, align 4
    150   %1 = insertelement <4 x i32> undef, i32 %0, i32 0
    151   %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
    152   ret <4 x i32> %lane
    153 }
    154 
    155 define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
    156 ; CHECK-LABEL: test_vld1q_dup_s64:
    157 ; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
    158 entry:
    159   %0 = load i64, i64* %a, align 8
    160   %1 = insertelement <2 x i64> undef, i64 %0, i32 0
    161   %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
    162   ret <2 x i64> %lane
    163 }
    164 
    165 define <4 x float> @test_vld1q_dup_f32(float* %a) {
    166 ; CHECK-LABEL: test_vld1q_dup_f32:
    167 ; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
    168 entry:
    169   %0 = load float, float* %a, align 4
    170   %1 = insertelement <4 x float> undef, float %0, i32 0
    171   %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
    172   ret <4 x float> %lane
    173 }
    174 
    175 define <2 x double> @test_vld1q_dup_f64(double* %a) {
    176 ; CHECK-LABEL: test_vld1q_dup_f64:
    177 ; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
    178 entry:
    179   %0 = load double, double* %a, align 8
    180   %1 = insertelement <2 x double> undef, double %0, i32 0
    181   %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
    182   ret <2 x double> %lane
    183 }
    184 
    185 define <8 x i8> @test_vld1_dup_s8(i8* %a) {
    186 ; CHECK-LABEL: test_vld1_dup_s8:
    187 ; CHECK: ld1r {{{ ?v[0-9]+.8b ?}}}, [x0]
    188 entry:
    189   %0 = load i8, i8* %a, align 1
    190   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
    191   %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
    192   ret <8 x i8> %lane
    193 }
    194 
    195 define <4 x i16> @test_vld1_dup_s16(i16* %a) {
    196 ; CHECK-LABEL: test_vld1_dup_s16:
    197 ; CHECK: ld1r {{{ ?v[0-9]+.4h ?}}}, [x0]
    198 entry:
    199   %0 = load i16, i16* %a, align 2
    200   %1 = insertelement <4 x i16> undef, i16 %0, i32 0
    201   %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
    202   ret <4 x i16> %lane
    203 }
    204 
    205 define <2 x i32> @test_vld1_dup_s32(i32* %a) {
    206 ; CHECK-LABEL: test_vld1_dup_s32:
    207 ; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
    208 entry:
    209   %0 = load i32, i32* %a, align 4
    210   %1 = insertelement <2 x i32> undef, i32 %0, i32 0
    211   %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
    212   ret <2 x i32> %lane
    213 }
    214 
    215 define <1 x i64> @test_vld1_dup_s64(i64* %a) {
    216 ; CHECK-LABEL: test_vld1_dup_s64:
    217 ; CHECK: ldr {{d[0-9]+}}, [x0]
    218 entry:
    219   %0 = load i64, i64* %a, align 8
    220   %1 = insertelement <1 x i64> undef, i64 %0, i32 0
    221   ret <1 x i64> %1
    222 }
    223 
    224 define <2 x float> @test_vld1_dup_f32(float* %a) {
    225 ; CHECK-LABEL: test_vld1_dup_f32:
    226 ; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
    227 entry:
    228   %0 = load float, float* %a, align 4
    229   %1 = insertelement <2 x float> undef, float %0, i32 0
    230   %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
    231   ret <2 x float> %lane
    232 }
    233 
    234 define <1 x double> @test_vld1_dup_f64(double* %a) {
    235 ; CHECK-LABEL: test_vld1_dup_f64:
    236 ; CHECK: ldr {{d[0-9]+}}, [x0]
    237 entry:
    238   %0 = load double, double* %a, align 8
    239   %1 = insertelement <1 x double> undef, double %0, i32 0
    240   ret <1 x double> %1
    241 }
    242 
    243 define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 {
    244 ; As there is a store operation depending on %1, LD1R pattern can't be selected.
    245 ; So LDR and FMOV should be emitted.
    246 ; CHECK-LABEL: testDUP.v1i64:
    247 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
    248 ; CHECK-DAG: fmov {{d[0-9]+}}, {{x[0-9]+}}
    249 ; CHECK-DAG: str {{x[0-9]+}}, [{{x[0-9]+}}]
    250   %1 = load i64, i64* %a, align 8
    251   store i64 %1, i64* %b, align 8
    252   %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
    253   ret <1 x i64> %vecinit.i
    254 }
    255 
    256 define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 {
    257 ; As there is a store operation depending on %1, LD1R pattern can't be selected.
    258 ; So LDR and FMOV should be emitted.
    259 ; CHECK-LABEL: testDUP.v1f64:
    260 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
    261 ; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}]
    262   %1 = load double, double* %a, align 8
    263   store double %1, double* %b, align 8
    264   %vecinit.i = insertelement <1 x double> undef, double %1, i32 0
    265   ret <1 x double> %vecinit.i
    266 }
    267 
    268 define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
    269 ; CHECK-LABEL: test_vld1q_lane_s8:
    270 ; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
    271 entry:
    272   %0 = load i8, i8* %a, align 1
    273   %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
    274   ret <16 x i8> %vld1_lane
    275 }
    276 
    277 define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
    278 ; CHECK-LABEL: test_vld1q_lane_s16:
    279 ; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
    280 entry:
    281   %0 = load i16, i16* %a, align 2
    282   %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
    283   ret <8 x i16> %vld1_lane
    284 }
    285 
    286 define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
    287 ; CHECK-LABEL: test_vld1q_lane_s32:
    288 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
    289 entry:
    290   %0 = load i32, i32* %a, align 4
    291   %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
    292   ret <4 x i32> %vld1_lane
    293 }
    294 
    295 define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
    296 ; CHECK-LABEL: test_vld1q_lane_s64:
    297 ; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
    298 entry:
    299   %0 = load i64, i64* %a, align 8
    300   %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
    301   ret <2 x i64> %vld1_lane
    302 }
    303 
    304 define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
    305 ; CHECK-LABEL: test_vld1q_lane_f32:
    306 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
    307 entry:
    308   %0 = load float, float* %a, align 4
    309   %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
    310   ret <4 x float> %vld1_lane
    311 }
    312 
    313 define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
    314 ; CHECK-LABEL: test_vld1q_lane_f64:
    315 ; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
    316 entry:
    317   %0 = load double, double* %a, align 8
    318   %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
    319   ret <2 x double> %vld1_lane
    320 }
    321 
    322 define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
    323 ; CHECK-LABEL: test_vld1_lane_s8:
    324 ; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
    325 entry:
    326   %0 = load i8, i8* %a, align 1
    327   %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
    328   ret <8 x i8> %vld1_lane
    329 }
    330 
    331 define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
    332 ; CHECK-LABEL: test_vld1_lane_s16:
    333 ; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
    334 entry:
    335   %0 = load i16, i16* %a, align 2
    336   %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
    337   ret <4 x i16> %vld1_lane
    338 }
    339 
    340 define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
    341 ; CHECK-LABEL: test_vld1_lane_s32:
    342 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
    343 entry:
    344   %0 = load i32, i32* %a, align 4
    345   %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
    346   ret <2 x i32> %vld1_lane
    347 }
    348 
    349 define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
    350 ; CHECK-LABEL: test_vld1_lane_s64:
    351 ; CHECK: ldr {{d[0-9]+}}, [x0]
    352 entry:
    353   %0 = load i64, i64* %a, align 8
    354   %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
    355   ret <1 x i64> %vld1_lane
    356 }
    357 
    358 define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
    359 ; CHECK-LABEL: test_vld1_lane_f32:
    360 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
    361 entry:
    362   %0 = load float, float* %a, align 4
    363   %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
    364   ret <2 x float> %vld1_lane
    365 }
    366 
    367 define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
    368 ; CHECK-LABEL: test_vld1_lane_f64:
    369 ; CHECK: ldr {{d[0-9]+}}, [x0]
    370 entry:
    371   %0 = load double, double* %a, align 8
    372   %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
    373   ret <1 x double> %vld1_lane
    374 }
    375 
    376 define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) {
    377 ; CHECK-LABEL: test_vst1q_lane_s8:
    378 ; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
    379 entry:
    380   %0 = extractelement <16 x i8> %b, i32 15
    381   store i8 %0, i8* %a, align 1
    382   ret void
    383 }
    384 
    385 define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) {
    386 ; CHECK-LABEL: test_vst1q_lane_s16:
    387 ; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
    388 entry:
    389   %0 = extractelement <8 x i16> %b, i32 7
    390   store i16 %0, i16* %a, align 2
    391   ret void
    392 }
    393 
    394 define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) {
    395 ; CHECK-LABEL: test_vst1q_lane_s32:
    396 ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
    397 entry:
    398   %0 = extractelement <4 x i32> %b, i32 3
    399   store i32 %0, i32* %a, align 4
    400   ret void
    401 }
    402 
    403 define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) {
    404 ; CHECK-LABEL: test_vst1q_lane_s64:
    405 ; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
    406 entry:
    407   %0 = extractelement <2 x i64> %b, i32 1
    408   store i64 %0, i64* %a, align 8
    409   ret void
    410 }
    411 
    412 define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) {
    413 ; CHECK-LABEL: test_vst1q_lane_f32:
    414 ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
    415 entry:
    416   %0 = extractelement <4 x float> %b, i32 3
    417   store float %0, float* %a, align 4
    418   ret void
    419 }
    420 
    421 define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) {
    422 ; CHECK-LABEL: test_vst1q_lane_f64:
    423 ; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
    424 entry:
    425   %0 = extractelement <2 x double> %b, i32 1
    426   store double %0, double* %a, align 8
    427   ret void
    428 }
    429 
    430 define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) {
    431 ; CHECK-LABEL: test_vst1_lane_s8:
    432 ; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
    433 entry:
    434   %0 = extractelement <8 x i8> %b, i32 7
    435   store i8 %0, i8* %a, align 1
    436   ret void
    437 }
    438 
    439 define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) {
    440 ; CHECK-LABEL: test_vst1_lane_s16:
    441 ; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
    442 entry:
    443   %0 = extractelement <4 x i16> %b, i32 3
    444   store i16 %0, i16* %a, align 2
    445   ret void
    446 }
    447 
    448 define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) {
    449 ; CHECK-LABEL: test_vst1_lane_s32:
    450 ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
    451 entry:
    452   %0 = extractelement <2 x i32> %b, i32 1
    453   store i32 %0, i32* %a, align 4
    454   ret void
    455 }
    456 
    457 define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) {
    458 ; CHECK-LABEL: test_vst1_lane_s64:
    459 ; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
    460 entry:
    461   %0 = extractelement <1 x i64> %b, i32 0
    462   store i64 %0, i64* %a, align 8
    463   ret void
    464 }
    465 
    466 define void @test_vst1_lane_f32(float* %a, <2 x float> %b) {
    467 ; CHECK-LABEL: test_vst1_lane_f32:
    468 ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
    469 entry:
    470   %0 = extractelement <2 x float> %b, i32 1
    471   store float %0, float* %a, align 4
    472   ret void
    473 }
    474 
    475 define void @test_vst1_lane_f64(double* %a, <1 x double> %b) {
    476 ; CHECK-LABEL: test_vst1_lane_f64:
    477 ; CHECK: str {{d[0-9]+}}, [x0]
    478 entry:
    479   %0 = extractelement <1 x double> %b, i32 0
    480   store double %0, double* %a, align 8
    481   ret void
    482 }
    483