Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
      2 // RUN:   -fallow-half-arguments-and-returns -emit-llvm -o - %s \
      3 // RUN: | opt -S -mem2reg | FileCheck %s
      4 
      5 #include <arm_neon.h>
      6 
      7 // CHECK-LABEL: define i8 @test_vget_lane_u8(<8 x i8> %a) #0 {
      8 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
      9 // CHECK:   ret i8 [[VGET_LANE]]
     10 uint8_t test_vget_lane_u8(uint8x8_t a) {
     11   return vget_lane_u8(a, 7);
     12 }
     13 
     14 // CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
     15 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
     16 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
     17 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
     18 // CHECK:   ret i16 [[VGET_LANE]]
     19 uint16_t test_vget_lane_u16(uint16x4_t a) {
     20   return vget_lane_u16(a, 3);
     21 }
     22 
     23 // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
     24 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
     25 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
     26 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
     27 // CHECK:   ret i32 [[VGET_LANE]]
     28 uint32_t test_vget_lane_u32(uint32x2_t a) {
     29   return vget_lane_u32(a, 1);
     30 }
     31 
     32 // CHECK-LABEL: define i8 @test_vget_lane_s8(<8 x i8> %a) #0 {
     33 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
     34 // CHECK:   ret i8 [[VGET_LANE]]
     35 int8_t test_vget_lane_s8(int8x8_t a) {
     36   return vget_lane_s8(a, 7);
     37 }
     38 
     39 // CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
     40 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
     41 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
     42 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
     43 // CHECK:   ret i16 [[VGET_LANE]]
     44 int16_t test_vget_lane_s16(int16x4_t a) {
     45   return vget_lane_s16(a, 3);
     46 }
     47 
     48 // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
     49 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
     50 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
     51 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
     52 // CHECK:   ret i32 [[VGET_LANE]]
     53 int32_t test_vget_lane_s32(int32x2_t a) {
     54   return vget_lane_s32(a, 1);
     55 }
     56 
     57 // CHECK-LABEL: define i8 @test_vget_lane_p8(<8 x i8> %a) #0 {
     58 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
     59 // CHECK:   ret i8 [[VGET_LANE]]
     60 poly8_t test_vget_lane_p8(poly8x8_t a) {
     61   return vget_lane_p8(a, 7);
     62 }
     63 
     64 // CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
     65 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
     66 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
     67 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
     68 // CHECK:   ret i16 [[VGET_LANE]]
     69 poly16_t test_vget_lane_p16(poly16x4_t a) {
     70   return vget_lane_p16(a, 3);
     71 }
     72 
     73 // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
     74 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
     75 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
     76 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
     77 // CHECK:   ret float [[VGET_LANE]]
     78 float32_t test_vget_lane_f32(float32x2_t a) {
     79   return vget_lane_f32(a, 1);
     80 }
     81 
     82 // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 {
     83 // CHECK:   [[__REINT_242:%.*]] = alloca <4 x half>, align 8
     84 // CHECK:   [[__REINT1_242:%.*]] = alloca i16, align 2
     85 // CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
     86 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
     87 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
     88 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
     89 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
     90 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
     91 // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
     92 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
     93 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
     94 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
     95 // CHECK:   ret float [[CONV]]
     96 float32_t test_vget_lane_f16(float16x4_t a) {
     97   return vget_lane_f16(a, 1);
     98 }
     99 
    100 // CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
    101 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
    102 // CHECK:   ret i8 [[VGETQ_LANE]]
    103 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
    104   return vgetq_lane_u8(a, 15);
    105 }
    106 
    107 // CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
    108 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    109 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    110 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
    111 // CHECK:   ret i16 [[VGETQ_LANE]]
    112 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
    113   return vgetq_lane_u16(a, 7);
    114 }
    115 
    116 // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
    117 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    118 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    119 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
    120 // CHECK:   ret i32 [[VGETQ_LANE]]
    121 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
    122   return vgetq_lane_u32(a, 3);
    123 }
    124 
    125 // CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
    126 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
    127 // CHECK:   ret i8 [[VGETQ_LANE]]
    128 int8_t test_vgetq_lane_s8(int8x16_t a) {
    129   return vgetq_lane_s8(a, 15);
    130 }
    131 
    132 // CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
    133 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    134 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    135 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
    136 // CHECK:   ret i16 [[VGETQ_LANE]]
    137 int16_t test_vgetq_lane_s16(int16x8_t a) {
    138   return vgetq_lane_s16(a, 7);
    139 }
    140 
    141 // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
    142 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    143 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    144 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
    145 // CHECK:   ret i32 [[VGETQ_LANE]]
    146 int32_t test_vgetq_lane_s32(int32x4_t a) {
    147   return vgetq_lane_s32(a, 3);
    148 }
    149 
    150 // CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
    151 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
    152 // CHECK:   ret i8 [[VGETQ_LANE]]
    153 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
    154   return vgetq_lane_p8(a, 15);
    155 }
    156 
    157 // CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
    158 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    159 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    160 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
    161 // CHECK:   ret i16 [[VGETQ_LANE]]
    162 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
    163   return vgetq_lane_p16(a, 7);
    164 }
    165 
    166 // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
    167 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    168 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    169 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
    170 // CHECK:   ret float [[VGETQ_LANE]]
    171 float32_t test_vgetq_lane_f32(float32x4_t a) {
    172   return vgetq_lane_f32(a, 3);
    173 }
    174 
    175 // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
    176 // CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
    177 // CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
    178 // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
    179 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
    180 // CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
    181 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
    182 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
    183 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
    184 // CHECK:   store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
    185 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
    186 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
    187 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
    188 // CHECK:   ret float [[CONV]]
    189 float32_t test_vgetq_lane_f16(float16x8_t a) {
    190   return vgetq_lane_f16(a, 3);
    191 }
    192 
    193 // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
    194 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
    195 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
    196 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
    197 // CHECK:   ret i64 [[VGET_LANE]]
    198 int64_t test_vget_lane_s64(int64x1_t a) {
    199   return vget_lane_s64(a, 0);
    200 }
    201 
    202 // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
    203 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
    204 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
    205 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
    206 // CHECK:   ret i64 [[VGET_LANE]]
    207 uint64_t test_vget_lane_u64(uint64x1_t a) {
    208   return vget_lane_u64(a, 0);
    209 }
    210 
    211 // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
    212 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
    213 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    214 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
    215 // CHECK:   ret i64 [[VGETQ_LANE]]
    216 int64_t test_vgetq_lane_s64(int64x2_t a) {
    217   return vgetq_lane_s64(a, 1);
    218 }
    219 
    220 // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
    221 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
    222 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    223 // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
    224 // CHECK:   ret i64 [[VGETQ_LANE]]
    225 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
    226   return vgetq_lane_u64(a, 1);
    227 }
    228 
    229 
    230 // CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 %a, <8 x i8> %b) #0 {
    231 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
    232 // CHECK:   ret <8 x i8> [[VSET_LANE]]
    233 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
    234   return vset_lane_u8(a, b, 7);
    235 }
    236 
    237 // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 {
    238 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
    239 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    240 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
    241 // CHECK:   ret <4 x i16> [[VSET_LANE]]
    242 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
    243   return vset_lane_u16(a, b, 3);
    244 }
    245 
    246 // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 {
    247 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
    248 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
    249 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
    250 // CHECK:   ret <2 x i32> [[VSET_LANE]]
    251 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
    252   return vset_lane_u32(a, b, 1);
    253 }
    254 
    255 // CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 %a, <8 x i8> %b) #0 {
    256 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
    257 // CHECK:   ret <8 x i8> [[VSET_LANE]]
    258 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
    259   return vset_lane_s8(a, b, 7);
    260 }
    261 
    262 // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 {
    263 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
    264 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    265 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
    266 // CHECK:   ret <4 x i16> [[VSET_LANE]]
    267 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
    268   return vset_lane_s16(a, b, 3);
    269 }
    270 
    271 // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 {
    272 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
    273 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
    274 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
    275 // CHECK:   ret <2 x i32> [[VSET_LANE]]
    276 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
    277   return vset_lane_s32(a, b, 1);
    278 }
    279 
    280 // CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 %a, <8 x i8> %b) #0 {
    281 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
    282 // CHECK:   ret <8 x i8> [[VSET_LANE]]
    283 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
    284   return vset_lane_p8(a, b, 7);
    285 }
    286 
    287 // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 {
    288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
    289 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    290 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
    291 // CHECK:   ret <4 x i16> [[VSET_LANE]]
    292 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
    293   return vset_lane_p16(a, b, 3);
    294 }
    295 
    296 // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 {
    297 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
    298 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
    299 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1
    300 // CHECK:   ret <2 x float> [[VSET_LANE]]
    301 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
    302   return vset_lane_f32(a, b, 1);
    303 }
    304 
    305 // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 {
    306 // CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
    307 // CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
    308 // CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
    309 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
    310 // CHECK:   store half [[TMP0]], half* [[__REINT_246]], align 2
    311 // CHECK:   store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
    312 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
    313 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
    314 // CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
    315 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
    316 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
    317 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
    318 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3
    319 // CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
    320 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
    321 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
    322 // CHECK:   ret <4 x half> [[TMP8]]
    323 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
    324   return vset_lane_f16(*a, b, 3);
    325 }
    326 
    327 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #0 {
    328 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
    329 // CHECK:   ret <16 x i8> [[VSET_LANE]]
    330 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
    331   return vsetq_lane_u8(a, b, 15);
    332 }
    333 
    334 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #0 {
    335 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
    336 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    337 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
    338 // CHECK:   ret <8 x i16> [[VSET_LANE]]
    339 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
    340   return vsetq_lane_u16(a, b, 7);
    341 }
    342 
    343 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 {
    344 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
    345 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    346 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
    347 // CHECK:   ret <4 x i32> [[VSET_LANE]]
    348 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
    349   return vsetq_lane_u32(a, b, 3);
    350 }
    351 
    352 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #0 {
    353 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
    354 // CHECK:   ret <16 x i8> [[VSET_LANE]]
    355 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
    356   return vsetq_lane_s8(a, b, 15);
    357 }
    358 
    359 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #0 {
    360 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
    361 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    362 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
    363 // CHECK:   ret <8 x i16> [[VSET_LANE]]
    364 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
    365   return vsetq_lane_s16(a, b, 7);
    366 }
    367 
    368 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
    369 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
    370 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    371 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
    372 // CHECK:   ret <4 x i32> [[VSET_LANE]]
    373 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
    374   return vsetq_lane_s32(a, b, 3);
    375 }
    376 
    377 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #0 {
    378 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
    379 // CHECK:   ret <16 x i8> [[VSET_LANE]]
    380 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
    381   return vsetq_lane_p8(a, b, 15);
    382 }
    383 
    384 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #0 {
    385 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
    386 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    387 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
    388 // CHECK:   ret <8 x i16> [[VSET_LANE]]
    389 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
    390   return vsetq_lane_p16(a, b, 7);
    391 }
    392 
    393 // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 {
    394 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
    395 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    396 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
    397 // CHECK:   ret <4 x float> [[VSET_LANE]]
    398 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
    399   return vsetq_lane_f32(a, b, 3);
    400 }
    401 
    402 // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 {
    403 // CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
    404 // CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
    405 // CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
    406 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
    407 // CHECK:   store half [[TMP0]], half* [[__REINT_248]], align 2
    408 // CHECK:   store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
    409 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
    410 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
    411 // CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
    412 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
    413 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
    414 // CHECK:   [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
    415 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7
    416 // CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
    417 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
    418 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
    419 // CHECK:   ret <8 x half> [[TMP8]]
    420 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
    421   return vsetq_lane_f16(*a, b, 7);
    422 }
    423 
    424 // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 {
    425 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
    426 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
    427 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
    428 // CHECK:   ret <1 x i64> [[VSET_LANE]]
    429 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
    430   return vset_lane_s64(a, b, 0);
    431 }
    432 
    433 // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 {
    434 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
    435 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
    436 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
    437 // CHECK:   ret <1 x i64> [[VSET_LANE]]
    438 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
    439   return vset_lane_u64(a, b, 0);
    440 }
    441 
    442 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 {
    443 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
    444 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    445 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
    446 // CHECK:   ret <2 x i64> [[VSET_LANE]]
    447 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
    448   return vsetq_lane_s64(a, b, 1);
    449 }
    450 
    451 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 {
    452 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
    453 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    454 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
    455 // CHECK:   ret <2 x i64> [[VSET_LANE]]
    456 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
    457   return vsetq_lane_u64(a, b, 1);
    458 }
    459