1 // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\ 2 // RUN: -target-cpu swift -fallow-half-arguments-and-returns -ffreestanding -emit-llvm -o - %s \ 3 // RUN: | opt -S -mem2reg | FileCheck %s 4 5 // REQUIRES: long-tests 6 7 #include <arm_neon.h> 8 9 // CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 10 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4 11 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]] 12 // CHECK: ret <8 x i8> [[ADD_I]] 13 int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) { 14 return vaba_s8(a, b, c); 15 } 16 17 // CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 18 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 19 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 20 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 21 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 22 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 23 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> 24 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> 25 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[TMP2]] 26 // CHECK: ret <4 x i16> [[ADD_I]] 27 int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 28 return vaba_s16(a, b, c); 29 } 30 31 // CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 32 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 33 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 34 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 35 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 36 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 37 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> 38 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> 39 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[TMP2]] 40 // CHECK: ret <2 x i32> [[ADD_I]] 41 int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 42 return vaba_s32(a, b, c); 43 } 44 45 // CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 46 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4 47 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]] 48 // CHECK: ret <8 x i8> [[ADD_I]] 49 uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 50 return vaba_u8(a, b, c); 51 } 52 53 // CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 54 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 55 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 56 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 57 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 58 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 59 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> 60 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> 61 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[TMP2]] 62 // CHECK: ret <4 x i16> [[ADD_I]] 63 uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 64 return vaba_u16(a, b, c); 65 } 66 67 // CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 68 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 69 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 70 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 71 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 72 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 73 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> 74 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> 75 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[TMP2]] 76 // CHECK: ret <2 x i32> [[ADD_I]] 77 uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 78 return vaba_u32(a, b, c); 79 } 80 81 // CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 82 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c) #4 83 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]] 84 // CHECK: ret <16 x i8> [[ADD_I]] 85 int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { 86 return vabaq_s8(a, b, c); 87 } 88 89 // CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 90 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 91 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8> 92 // CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 93 // CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 94 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I_I]], <8 x i16> [[VABDQ_V1_I_I]]) #4 95 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8> 96 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <8 x i16> 97 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP2]] 98 // CHECK: ret <8 x i16> [[ADD_I]] 99 int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { 100 return vabaq_s16(a, b, c); 101 } 102 103 // CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 104 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 105 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8> 106 // CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 107 // CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 108 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I_I]], <4 x i32> [[VABDQ_V1_I_I]]) #4 109 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8> 110 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <4 x i32> 111 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] 112 // CHECK: ret <4 x i32> [[ADD_I]] 113 int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { 114 return vabaq_s32(a, b, c); 115 } 116 117 // CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 118 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c) #4 119 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]] 120 // CHECK: ret <16 x i8> [[ADD_I]] 121 uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { 122 return vabaq_u8(a, b, c); 123 } 124 125 // CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 126 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 127 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8> 128 // CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 129 // CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 130 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I_I]], <8 x i16> [[VABDQ_V1_I_I]]) #4 131 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8> 132 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <8 x i16> 133 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP2]] 134 // CHECK: ret <8 x i16> [[ADD_I]] 135 uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { 136 return vabaq_u16(a, b, c); 137 } 138 139 // CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 140 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 141 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8> 142 // CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 143 // CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 144 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I_I]], <4 x i32> [[VABDQ_V1_I_I]]) #4 145 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8> 146 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <4 x i32> 147 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] 148 // CHECK: ret <4 x i32> [[ADD_I]] 149 uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { 150 return vabaq_u32(a, b, c); 151 } 152 153 154 // CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 155 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4 156 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16> 157 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 158 // CHECK: ret <8 x i16> [[ADD_I]] 159 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 160 return vabal_s8(a, b, c); 161 } 162 163 // CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 164 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 165 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 166 // CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 167 // CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 168 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I_I]], <4 x i16> [[VABD_V1_I_I_I]]) #4 169 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> 170 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <4 x i16> 171 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> 172 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 173 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 174 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 175 // CHECK: ret <4 x i32> [[ADD_I]] 176 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 177 return vabal_s16(a, b, c); 178 } 179 180 // CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 182 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 183 // CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 184 // CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 185 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I_I]], <2 x i32> [[VABD_V1_I_I_I]]) #4 186 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> 187 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <2 x i32> 188 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> 189 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 190 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 191 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 192 // CHECK: ret <2 x i64> [[ADD_I]] 193 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 194 return vabal_s32(a, b, c); 195 } 196 197 // CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 198 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4 199 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16> 200 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 201 // CHECK: ret <8 x i16> [[ADD_I]] 202 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 203 return vabal_u8(a, b, c); 204 } 205 206 // CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 207 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 208 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 209 // CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 210 // CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 211 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I_I]], <4 x i16> [[VABD_V1_I_I_I]]) #4 212 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> 213 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <4 x i16> 214 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> 215 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 216 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 217 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 218 // CHECK: ret <4 x i32> [[ADD_I]] 219 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 220 return vabal_u16(a, b, c); 221 } 222 223 // CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 224 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 225 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 226 // CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 227 // CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 228 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I_I]], <2 x i32> [[VABD_V1_I_I_I]]) #4 229 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> 230 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <2 x i32> 231 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> 232 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 233 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 234 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 235 // CHECK: ret <2 x i64> [[ADD_I]] 236 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 237 return vabal_u32(a, b, c); 238 } 239 240 241 // CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 242 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 243 // CHECK: ret <8 x i8> [[VABD_V_I]] 244 int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) { 245 return vabd_s8(a, b); 246 } 247 248 // CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 249 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 250 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 251 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 252 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 253 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]]) #4 254 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8> 255 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16> 256 // CHECK: ret <4 x i16> [[TMP2]] 257 int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) { 258 return vabd_s16(a, b); 259 } 260 261 // CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 262 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 263 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 264 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 265 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 266 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]]) #4 267 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8> 268 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32> 269 // CHECK: ret <2 x i32> [[TMP2]] 270 int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) { 271 return vabd_s32(a, b); 272 } 273 274 // CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 275 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 276 // CHECK: ret <8 x i8> [[VABD_V_I]] 277 uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) { 278 return vabd_u8(a, b); 279 } 280 281 // CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 282 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 283 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 284 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 285 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 286 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]]) #4 287 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8> 288 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16> 289 // CHECK: ret <4 x i16> [[TMP2]] 290 uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) { 291 return vabd_u16(a, b); 292 } 293 294 // CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 295 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 296 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 297 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 298 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 299 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]]) #4 300 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8> 301 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32> 302 // CHECK: ret <2 x i32> [[TMP2]] 303 uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) { 304 return vabd_u32(a, b); 305 } 306 307 // CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %a, <2 x float> %b) #0 { 308 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 309 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 310 // CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 311 // CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 312 // CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> [[VABD_V_I]], <2 x float> [[VABD_V1_I]]) #4 313 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8> 314 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x float> 315 // CHECK: ret <2 x float> [[TMP2]] 316 float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) { 317 return vabd_f32(a, b); 318 } 319 320 // CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 321 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 322 // CHECK: ret <16 x i8> [[VABDQ_V_I]] 323 int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) { 324 return vabdq_s8(a, b); 325 } 326 327 // CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 328 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 329 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 330 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 331 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 332 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]]) #4 333 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8> 334 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16> 335 // CHECK: ret <8 x i16> [[TMP2]] 336 int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) { 337 return vabdq_s16(a, b); 338 } 339 340 // CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 341 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 342 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 343 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 344 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 345 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]]) #4 346 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8> 347 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32> 348 // CHECK: ret <4 x i32> [[TMP2]] 349 int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) { 350 return vabdq_s32(a, b); 351 } 352 353 // CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 354 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 355 // CHECK: ret <16 x i8> [[VABDQ_V_I]] 356 uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) { 357 return vabdq_u8(a, b); 358 } 359 360 // CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 361 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 362 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 363 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 364 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 365 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]]) #4 366 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8> 367 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16> 368 // CHECK: ret <8 x i16> [[TMP2]] 369 uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) { 370 return vabdq_u16(a, b); 371 } 372 373 // CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 374 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 375 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 376 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 377 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 378 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]]) #4 379 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8> 380 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32> 381 // CHECK: ret <4 x i32> [[TMP2]] 382 uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) { 383 return vabdq_u32(a, b); 384 } 385 386 // CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) #0 { 387 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 388 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 389 // CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 390 // CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 391 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> [[VABDQ_V_I]], <4 x float> [[VABDQ_V1_I]]) #4 392 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8> 393 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x float> 394 // CHECK: ret <4 x float> [[TMP2]] 395 float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) { 396 return vabdq_f32(a, b); 397 } 398 399 400 // CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 401 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 402 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16> 403 // CHECK: ret <8 x i16> [[VMOVL_I_I]] 404 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { 405 return vabdl_s8(a, b); 406 } 407 408 // CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 409 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 410 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 411 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 412 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 413 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 414 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> 415 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> 416 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> 417 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 418 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 419 // CHECK: ret <4 x i32> [[VMOVL_I_I]] 420 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { 421 return vabdl_s16(a, b); 422 } 423 424 // CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 425 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 426 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 427 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 428 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 429 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 430 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> 431 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> 432 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> 433 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 434 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 435 // CHECK: ret <2 x i64> [[VMOVL_I_I]] 436 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { 437 return vabdl_s32(a, b); 438 } 439 440 // CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 441 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 442 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16> 443 // CHECK: ret <8 x i16> [[VMOVL_I_I]] 444 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { 445 return vabdl_u8(a, b); 446 } 447 448 // CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 450 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 451 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 452 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 453 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 454 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> 455 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> 456 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> 457 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> 458 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 459 // CHECK: ret <4 x i32> [[VMOVL_I_I]] 460 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { 461 return vabdl_u16(a, b); 462 } 463 464 // CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 465 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 466 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 467 // CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 468 // CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 469 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 470 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> 471 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> 472 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> 473 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 474 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> 475 // CHECK: ret <2 x i64> [[VMOVL_I_I]] 476 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { 477 return vabdl_u32(a, b); 478 } 479 480 481 // CHECK-LABEL: define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 { 482 // CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4 483 // CHECK: ret <8 x i8> [[VABS_I]] 484 int8x8_t test_vabs_s8(int8x8_t a) { 485 return vabs_s8(a); 486 } 487 488 // CHECK-LABEL: define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 { 489 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 490 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 491 // CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> [[VABS_I]]) #4 492 // CHECK: ret <4 x i16> [[VABS1_I]] 493 int16x4_t test_vabs_s16(int16x4_t a) { 494 return vabs_s16(a); 495 } 496 497 // CHECK-LABEL: define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 { 498 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 499 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 500 // CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> [[VABS_I]]) #4 501 // CHECK: ret <2 x i32> [[VABS1_I]] 502 int32x2_t test_vabs_s32(int32x2_t a) { 503 return vabs_s32(a); 504 } 505 506 // CHECK-LABEL: define <2 x float> @test_vabs_f32(<2 x float> %a) #0 { 507 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 508 // CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 509 // CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]]) #4 510 // CHECK: ret <2 x float> [[VABS1_I]] 511 float32x2_t test_vabs_f32(float32x2_t a) { 512 return vabs_f32(a); 513 } 514 515 // CHECK-LABEL: define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 { 516 // CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4 517 // CHECK: ret <16 x i8> [[VABS_I]] 518 int8x16_t test_vabsq_s8(int8x16_t a) { 519 return vabsq_s8(a); 520 } 521 522 // CHECK-LABEL: define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 { 523 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 524 // CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 525 // CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> [[VABS_I]]) #4 526 // CHECK: ret <8 x i16> [[VABS1_I]] 527 int16x8_t test_vabsq_s16(int16x8_t a) { 528 return vabsq_s16(a); 529 } 530 531 // CHECK-LABEL: define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 { 532 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 533 // CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 534 // CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> [[VABS_I]]) #4 535 // CHECK: ret <4 x i32> [[VABS1_I]] 536 int32x4_t test_vabsq_s32(int32x4_t a) { 537 return vabsq_s32(a); 538 } 539 540 // CHECK-LABEL: define <4 x float> @test_vabsq_f32(<4 x float> %a) #0 { 541 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 542 // CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 543 // CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]]) #4 544 // CHECK: ret <4 x float> [[VABS1_I]] 545 float32x4_t test_vabsq_f32(float32x4_t a) { 546 return vabsq_f32(a); 547 } 548 549 550 // CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 551 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b 552 // CHECK: ret <8 x i8> [[ADD_I]] 553 int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) { 554 return vadd_s8(a, b); 555 } 556 557 // CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 558 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b 559 // CHECK: ret <4 x i16> [[ADD_I]] 560 int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) { 561 return vadd_s16(a, b); 562 } 563 564 // CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 565 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b 566 // CHECK: ret <2 x i32> [[ADD_I]] 567 int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) { 568 return vadd_s32(a, b); 569 } 570 571 // CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { 572 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b 573 // CHECK: ret <1 x i64> [[ADD_I]] 574 int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) { 575 return vadd_s64(a, b); 576 } 577 578 // CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %a, <2 x float> %b) #0 { 579 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b 580 // CHECK: ret <2 x float> [[ADD_I]] 581 float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) { 582 return vadd_f32(a, b); 583 } 584 585 // CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 586 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b 587 // CHECK: ret <8 x i8> [[ADD_I]] 588 uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) { 589 return vadd_u8(a, b); 590 } 591 592 // CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 593 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b 594 // CHECK: ret <4 x i16> [[ADD_I]] 595 uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) { 596 return vadd_u16(a, b); 597 } 598 599 // CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 600 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b 601 // CHECK: ret <2 x i32> [[ADD_I]] 602 uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) { 603 return vadd_u32(a, b); 604 } 605 606 // CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { 607 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b 608 // CHECK: ret <1 x i64> [[ADD_I]] 609 uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) { 610 return vadd_u64(a, b); 611 } 612 613 // CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 614 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b 615 // CHECK: ret <16 x i8> [[ADD_I]] 616 int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) { 617 return vaddq_s8(a, b); 618 } 619 620 // CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 621 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b 622 // CHECK: ret <8 x i16> [[ADD_I]] 623 int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) { 624 return vaddq_s16(a, b); 625 } 626 627 // CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 628 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b 629 // CHECK: ret <4 x i32> [[ADD_I]] 630 int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) { 631 return vaddq_s32(a, b); 632 } 633 634 // CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 635 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b 636 // CHECK: ret <2 x i64> [[ADD_I]] 637 int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) { 638 return vaddq_s64(a, b); 639 } 640 641 // CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) #0 { 642 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b 643 // CHECK: ret <4 x float> [[ADD_I]] 644 float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) { 645 return vaddq_f32(a, b); 646 } 647 648 // CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 649 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b 650 // CHECK: ret <16 x i8> [[ADD_I]] 651 uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) { 652 return vaddq_u8(a, b); 653 } 654 655 // CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 656 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b 657 // CHECK: ret <8 x i16> [[ADD_I]] 658 uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) { 659 return vaddq_u16(a, b); 660 } 661 662 // CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 663 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b 664 // CHECK: ret <4 x i32> [[ADD_I]] 665 uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) { 666 return vaddq_u32(a, b); 667 } 668 669 // CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 670 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b 671 // CHECK: ret <2 x i64> [[ADD_I]] 672 uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) { 673 return vaddq_u64(a, b); 674 } 675 676 677 // CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 678 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 679 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 680 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 681 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 682 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 683 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 684 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 685 // CHECK: ret <8 x i8> [[VADDHN2_I]] 686 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { 687 return vaddhn_s16(a, b); 688 } 689 690 // CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 691 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 692 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 693 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 694 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 695 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 696 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16> 697 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 698 // CHECK: ret <4 x i16> [[VADDHN2_I]] 699 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { 700 return vaddhn_s32(a, b); 701 } 702 703 // CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 704 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 705 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 706 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 707 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 708 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 709 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32> 710 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 711 // CHECK: ret <2 x i32> [[VADDHN2_I]] 712 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { 713 return vaddhn_s64(a, b); 714 } 715 716 // CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 717 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 718 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 719 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 720 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 721 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] 722 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 723 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 724 // CHECK: ret <8 x i8> [[VADDHN2_I]] 725 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { 726 return vaddhn_u16(a, b); 727 } 728 729 // CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 730 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 731 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 732 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 733 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 734 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] 735 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16> 736 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 737 // CHECK: ret <4 x i16> [[VADDHN2_I]] 738 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { 739 return vaddhn_u32(a, b); 740 } 741 742 // CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 743 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 744 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 745 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 746 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 747 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] 748 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32> 749 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 750 // CHECK: ret <2 x i32> [[VADDHN2_I]] 751 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { 752 return vaddhn_u64(a, b); 753 } 754 755 756 // CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 757 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 758 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 759 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 760 // CHECK: ret <8 x i16> [[ADD_I]] 761 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { 762 return vaddl_s8(a, b); 763 } 764 765 // CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 766 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 767 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 768 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 769 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 770 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 771 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> 772 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 773 // CHECK: ret <4 x i32> [[ADD_I]] 774 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { 775 return vaddl_s16(a, b); 776 } 777 778 // CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 779 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 780 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 781 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 782 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 783 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 784 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> 785 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 786 // CHECK: ret <2 x i64> [[ADD_I]] 787 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { 788 return vaddl_s32(a, b); 789 } 790 791 // CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 792 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 793 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 794 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 795 // CHECK: ret <8 x i16> [[ADD_I]] 796 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { 797 return vaddl_u8(a, b); 798 } 799 800 // CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 801 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 802 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 803 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 804 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 805 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 806 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 807 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 808 // CHECK: ret <4 x i32> [[ADD_I]] 809 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { 810 return vaddl_u16(a, b); 811 } 812 813 // CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 814 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 815 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 816 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 817 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 818 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 819 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 820 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 821 // CHECK: ret <2 x i64> [[ADD_I]] 822 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { 823 return vaddl_u32(a, b); 824 } 825 826 827 // CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 { 828 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 829 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 830 // CHECK: ret <8 x i16> [[ADD_I]] 831 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { 832 return vaddw_s8(a, b); 833 } 834 835 // CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 { 836 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 837 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 838 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 839 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 840 // CHECK: ret <4 x i32> [[ADD_I]] 841 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { 842 return vaddw_s16(a, b); 843 } 844 845 // CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 { 846 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 847 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 848 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 849 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 850 // CHECK: ret <2 x i64> [[ADD_I]] 851 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { 852 return vaddw_s32(a, b); 853 } 854 855 // CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 { 856 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 857 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 858 // CHECK: ret <8 x i16> [[ADD_I]] 859 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { 860 return vaddw_u8(a, b); 861 } 862 863 // CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 { 864 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 865 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 866 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 867 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 868 // CHECK: ret <4 x i32> [[ADD_I]] 869 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { 870 return vaddw_u16(a, b); 871 } 872 873 // CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 { 874 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 875 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 876 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 877 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 878 // CHECK: ret <2 x i64> [[ADD_I]] 879 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { 880 return vaddw_u32(a, b); 881 } 882 883 884 // CHECK-LABEL: define <8 x i8> @test_vand_s8(<8 x i8> %a, <8 x i8> %b) #0 { 885 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b 886 // CHECK: ret <8 x i8> [[AND_I]] 887 int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) { 888 return vand_s8(a, b); 889 } 890 891 // CHECK-LABEL: define <4 x i16> @test_vand_s16(<4 x i16> %a, <4 x i16> %b) #0 { 892 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b 893 // CHECK: ret <4 x i16> [[AND_I]] 894 int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) { 895 return vand_s16(a, b); 896 } 897 898 // CHECK-LABEL: define <2 x i32> @test_vand_s32(<2 x i32> %a, <2 x i32> %b) #0 { 899 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b 900 // CHECK: ret <2 x i32> [[AND_I]] 901 int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) { 902 return vand_s32(a, b); 903 } 904 905 // CHECK-LABEL: define <1 x i64> @test_vand_s64(<1 x i64> %a, <1 x i64> %b) #0 { 906 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b 907 // CHECK: ret <1 x i64> [[AND_I]] 908 int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) { 909 return vand_s64(a, b); 910 } 911 912 // CHECK-LABEL: define <8 x i8> @test_vand_u8(<8 x i8> %a, <8 x i8> %b) #0 { 913 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b 914 // CHECK: ret <8 x i8> [[AND_I]] 915 uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) { 916 return vand_u8(a, b); 917 } 918 919 // CHECK-LABEL: define <4 x i16> @test_vand_u16(<4 x i16> %a, <4 x i16> %b) #0 { 920 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b 921 // CHECK: ret <4 x i16> [[AND_I]] 922 uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) { 923 return vand_u16(a, b); 924 } 925 926 // CHECK-LABEL: define <2 x i32> @test_vand_u32(<2 x i32> %a, <2 x i32> %b) #0 { 927 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b 928 // CHECK: ret <2 x i32> [[AND_I]] 929 uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) { 930 return vand_u32(a, b); 931 } 932 933 // CHECK-LABEL: define <1 x i64> @test_vand_u64(<1 x i64> %a, <1 x i64> %b) #0 { 934 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b 935 // CHECK: ret <1 x i64> [[AND_I]] 936 uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) { 937 return vand_u64(a, b); 938 } 939 940 // CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 941 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b 942 // CHECK: ret <16 x i8> [[AND_I]] 943 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) { 944 return vandq_s8(a, b); 945 } 946 947 // CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 948 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b 949 // CHECK: ret <8 x i16> [[AND_I]] 950 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) { 951 return vandq_s16(a, b); 952 } 953 954 // CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 955 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b 956 // CHECK: ret <4 x i32> [[AND_I]] 957 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) { 958 return vandq_s32(a, b); 959 } 960 961 // CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 962 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b 963 // CHECK: ret <2 x i64> [[AND_I]] 964 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) { 965 return vandq_s64(a, b); 966 } 967 968 // CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 969 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b 970 // CHECK: ret <16 x i8> [[AND_I]] 971 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) { 972 return vandq_u8(a, b); 973 } 974 975 // CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 976 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b 977 // CHECK: ret <8 x i16> [[AND_I]] 978 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) { 979 return vandq_u16(a, b); 980 } 981 982 // CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 983 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b 984 // CHECK: ret <4 x i32> [[AND_I]] 985 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) { 986 return vandq_u32(a, b); 987 } 988 989 // CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 990 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b 991 // CHECK: ret <2 x i64> [[AND_I]] 992 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) { 993 return vandq_u64(a, b); 994 } 995 996 997 // CHECK-LABEL: define <8 x i8> @test_vbic_s8(<8 x i8> %a, <8 x i8> %b) #0 { 998 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 999 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] 1000 // CHECK: ret <8 x i8> [[AND_I]] 1001 int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { 1002 return vbic_s8(a, b); 1003 } 1004 1005 // CHECK-LABEL: define <4 x i16> @test_vbic_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1006 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1> 1007 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] 1008 // CHECK: ret <4 x i16> [[AND_I]] 1009 int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { 1010 return vbic_s16(a, b); 1011 } 1012 1013 // CHECK-LABEL: define <2 x i32> @test_vbic_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1014 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1> 1015 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] 1016 // CHECK: ret <2 x i32> [[AND_I]] 1017 int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { 1018 return vbic_s32(a, b); 1019 } 1020 1021 // CHECK-LABEL: define <1 x i64> @test_vbic_s64(<1 x i64> %a, <1 x i64> %b) #0 { 1022 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1> 1023 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] 1024 // CHECK: ret <1 x i64> [[AND_I]] 1025 int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { 1026 return vbic_s64(a, b); 1027 } 1028 1029 // CHECK-LABEL: define <8 x i8> @test_vbic_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1030 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1031 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] 1032 // CHECK: ret <8 x i8> [[AND_I]] 1033 uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { 1034 return vbic_u8(a, b); 1035 } 1036 1037 // CHECK-LABEL: define <4 x i16> @test_vbic_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1038 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1> 1039 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] 1040 // CHECK: ret <4 x i16> [[AND_I]] 1041 uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { 1042 return vbic_u16(a, b); 1043 } 1044 1045 // CHECK-LABEL: define <2 x i32> @test_vbic_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1046 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1> 1047 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] 1048 // CHECK: ret <2 x i32> [[AND_I]] 1049 uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { 1050 return vbic_u32(a, b); 1051 } 1052 1053 // CHECK-LABEL: define <1 x i64> @test_vbic_u64(<1 x i64> %a, <1 x i64> %b) #0 { 1054 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1> 1055 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] 1056 // CHECK: ret <1 x i64> [[AND_I]] 1057 uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { 1058 return vbic_u64(a, b); 1059 } 1060 1061 // CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1062 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1063 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] 1064 // CHECK: ret <16 x i8> [[AND_I]] 1065 int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { 1066 return vbicq_s8(a, b); 1067 } 1068 1069 // CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1070 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1071 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] 1072 // CHECK: ret <8 x i16> [[AND_I]] 1073 int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { 1074 return vbicq_s16(a, b); 1075 } 1076 1077 // CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1078 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 1079 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] 1080 // CHECK: ret <4 x i32> [[AND_I]] 1081 int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { 1082 return vbicq_s32(a, b); 1083 } 1084 1085 // CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 1086 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1> 1087 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] 1088 // CHECK: ret <2 x i64> [[AND_I]] 1089 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { 1090 return vbicq_s64(a, b); 1091 } 1092 1093 // CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1094 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1095 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] 1096 // CHECK: ret <16 x i8> [[AND_I]] 1097 uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { 1098 return vbicq_u8(a, b); 1099 } 1100 1101 // CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1102 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1103 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] 1104 // CHECK: ret <8 x i16> [[AND_I]] 1105 uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { 1106 return vbicq_u16(a, b); 1107 } 1108 1109 // CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1110 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 1111 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] 1112 // CHECK: ret <4 x i32> [[AND_I]] 1113 uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { 1114 return vbicq_u32(a, b); 1115 } 1116 1117 // CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 1118 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1> 1119 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] 1120 // CHECK: ret <2 x i64> [[AND_I]] 1121 uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { 1122 return vbicq_u64(a, b); 1123 } 1124 1125 1126 // CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 1127 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 1128 // CHECK: ret <8 x i8> [[VBSL_V_I]] 1129 int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) { 1130 return vbsl_s8(a, b, c); 1131 } 1132 1133 // CHECK-LABEL: define <4 x i16> @test_vbsl_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 1134 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1135 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1136 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 1137 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1138 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> 1139 // CHECK: ret <4 x i16> [[TMP3]] 1140 int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) { 1141 return vbsl_s16(a, b, c); 1142 } 1143 1144 // CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 1145 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1146 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 1147 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 1148 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1149 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32> 1150 // CHECK: ret <2 x i32> [[TMP3]] 1151 int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) { 1152 return vbsl_s32(a, b, c); 1153 } 1154 1155 // CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { 1156 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1157 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1158 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> 1159 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1160 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64> 1161 // CHECK: ret <1 x i64> [[TMP3]] 1162 int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) { 1163 return vbsl_s64(a, b, c); 1164 } 1165 1166 // CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 1167 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 1168 // CHECK: ret <8 x i8> [[VBSL_V_I]] 1169 uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 1170 return vbsl_u8(a, b, c); 1171 } 1172 1173 // CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 1174 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1175 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1176 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 1177 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1178 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> 1179 // CHECK: ret <4 x i16> [[TMP3]] 1180 uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 1181 return vbsl_u16(a, b, c); 1182 } 1183 1184 // CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 1185 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1186 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 1187 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 1188 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1189 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32> 1190 // CHECK: ret <2 x i32> [[TMP3]] 1191 uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 1192 return vbsl_u32(a, b, c); 1193 } 1194 1195 // CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { 1196 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1197 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1198 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> 1199 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1200 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64> 1201 // CHECK: ret <1 x i64> [[TMP3]] 1202 uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) { 1203 return vbsl_u64(a, b, c); 1204 } 1205 1206 // CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x i32> %a, <2 x float> %b, <2 x float> %c) #0 { 1207 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1208 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1209 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> 1210 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1211 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float> 1212 // CHECK: ret <2 x float> [[TMP3]] 1213 float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) { 1214 return vbsl_f32(a, b, c); 1215 } 1216 1217 // CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 1218 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 1219 // CHECK: ret <8 x i8> [[VBSL_V_I]] 1220 poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) { 1221 return vbsl_p8(a, b, c); 1222 } 1223 1224 // CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 1225 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1226 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 1227 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 1228 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) #4 1229 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> 1230 // CHECK: ret <4 x i16> [[TMP3]] 1231 poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) { 1232 return vbsl_p16(a, b, c); 1233 } 1234 1235 // CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 1236 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 1237 // CHECK: ret <16 x i8> [[VBSLQ_V_I]] 1238 int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) { 1239 return vbslq_s8(a, b, c); 1240 } 1241 1242 // CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 1243 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1244 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1245 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> 1246 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1247 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> 1248 // CHECK: ret <8 x i16> [[TMP3]] 1249 int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) { 1250 return vbslq_s16(a, b, c); 1251 } 1252 1253 // CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 1254 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1255 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 1256 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8> 1257 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1258 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32> 1259 // CHECK: ret <4 x i32> [[TMP3]] 1260 int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) { 1261 return vbslq_s32(a, b, c); 1262 } 1263 1264 // CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { 1265 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 1266 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 1267 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> 1268 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1269 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64> 1270 // CHECK: ret <2 x i64> [[TMP3]] 1271 int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) { 1272 return vbslq_s64(a, b, c); 1273 } 1274 1275 // CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 1276 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 1277 // CHECK: ret <16 x i8> [[VBSLQ_V_I]] 1278 uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { 1279 return vbslq_u8(a, b, c); 1280 } 1281 1282 // CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 1283 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1284 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1285 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> 1286 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1287 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> 1288 // CHECK: ret <8 x i16> [[TMP3]] 1289 uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { 1290 return vbslq_u16(a, b, c); 1291 } 1292 1293 // CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 1294 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1295 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 1296 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8> 1297 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1298 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32> 1299 // CHECK: ret <4 x i32> [[TMP3]] 1300 uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { 1301 return vbslq_u32(a, b, c); 1302 } 1303 1304 // CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { 1305 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 1306 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 1307 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> 1308 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1309 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64> 1310 // CHECK: ret <2 x i64> [[TMP3]] 1311 uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { 1312 return vbslq_u64(a, b, c); 1313 } 1314 1315 // CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %a, <4 x float> %b, <4 x float> %c) #0 { 1316 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1317 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1318 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> 1319 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1320 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float> 1321 // CHECK: ret <4 x float> [[TMP3]] 1322 float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) { 1323 return vbslq_f32(a, b, c); 1324 } 1325 1326 // CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 1327 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 1328 // CHECK: ret <16 x i8> [[VBSLQ_V_I]] 1329 poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) { 1330 return vbslq_p8(a, b, c); 1331 } 1332 1333 // CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 1334 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1335 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 1336 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> 1337 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) #4 1338 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16> 1339 // CHECK: ret <8 x i16> [[TMP3]] 1340 poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) { 1341 return vbslq_p16(a, b, c); 1342 } 1343 1344 1345 // CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %a, <2 x float> %b) #0 { 1346 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1347 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1348 // CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1349 // CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1350 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4 1351 // CHECK: ret <2 x i32> [[VCAGE_V2_I]] 1352 uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) { 1353 return vcage_f32(a, b); 1354 } 1355 1356 // CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %a, <4 x float> %b) #0 { 1357 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1358 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1359 // CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1360 // CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1361 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4 1362 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] 1363 uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) { 1364 return vcageq_f32(a, b); 1365 } 1366 1367 1368 // CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %a, <2 x float> %b) #0 { 1369 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1370 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1371 // CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1372 // CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1373 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4 1374 // CHECK: ret <2 x i32> [[VCAGT_V2_I]] 1375 uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) { 1376 return vcagt_f32(a, b); 1377 } 1378 1379 // CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %a, <4 x float> %b) #0 { 1380 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1381 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1382 // CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1383 // CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1384 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4 1385 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] 1386 uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) { 1387 return vcagtq_f32(a, b); 1388 } 1389 1390 1391 // CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %a, <2 x float> %b) #0 { 1392 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1393 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1394 // CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1395 // CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1396 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4 1397 // CHECK: ret <2 x i32> [[VCALE_V2_I]] 1398 uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) { 1399 return vcale_f32(a, b); 1400 } 1401 1402 // CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %a, <4 x float> %b) #0 { 1403 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1404 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1405 // CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1406 // CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1407 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4 1408 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]] 1409 uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) { 1410 return vcaleq_f32(a, b); 1411 } 1412 1413 1414 // CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %a, <2 x float> %b) #0 { 1415 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 1416 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 1417 // CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 1418 // CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1419 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4 1420 // CHECK: ret <2 x i32> [[VCALT_V2_I]] 1421 uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) { 1422 return vcalt_f32(a, b); 1423 } 1424 1425 // CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %a, <4 x float> %b) #0 { 1426 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 1427 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 1428 // CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 1429 // CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1430 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4 1431 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]] 1432 uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) { 1433 return vcaltq_f32(a, b); 1434 } 1435 1436 1437 // CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1438 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b 1439 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1440 // CHECK: ret <8 x i8> [[SEXT_I]] 1441 uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) { 1442 return vceq_s8(a, b); 1443 } 1444 1445 // CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1446 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b 1447 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1448 // CHECK: ret <4 x i16> [[SEXT_I]] 1449 uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) { 1450 return vceq_s16(a, b); 1451 } 1452 1453 // CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1454 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b 1455 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1456 // CHECK: ret <2 x i32> [[SEXT_I]] 1457 uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) { 1458 return vceq_s32(a, b); 1459 } 1460 1461 // CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %a, <2 x float> %b) #0 { 1462 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b 1463 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1464 // CHECK: ret <2 x i32> [[SEXT_I]] 1465 uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) { 1466 return vceq_f32(a, b); 1467 } 1468 1469 // CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1470 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b 1471 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1472 // CHECK: ret <8 x i8> [[SEXT_I]] 1473 uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) { 1474 return vceq_u8(a, b); 1475 } 1476 1477 // CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1478 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b 1479 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1480 // CHECK: ret <4 x i16> [[SEXT_I]] 1481 uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) { 1482 return vceq_u16(a, b); 1483 } 1484 1485 // CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1486 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b 1487 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1488 // CHECK: ret <2 x i32> [[SEXT_I]] 1489 uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) { 1490 return vceq_u32(a, b); 1491 } 1492 1493 // CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %a, <8 x i8> %b) #0 { 1494 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b 1495 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1496 // CHECK: ret <8 x i8> [[SEXT_I]] 1497 uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) { 1498 return vceq_p8(a, b); 1499 } 1500 1501 // CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1502 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b 1503 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1504 // CHECK: ret <16 x i8> [[SEXT_I]] 1505 uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) { 1506 return vceqq_s8(a, b); 1507 } 1508 1509 // CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1510 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b 1511 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1512 // CHECK: ret <8 x i16> [[SEXT_I]] 1513 uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) { 1514 return vceqq_s16(a, b); 1515 } 1516 1517 // CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1518 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b 1519 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1520 // CHECK: ret <4 x i32> [[SEXT_I]] 1521 uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) { 1522 return vceqq_s32(a, b); 1523 } 1524 1525 // CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %a, <4 x float> %b) #0 { 1526 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b 1527 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1528 // CHECK: ret <4 x i32> [[SEXT_I]] 1529 uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) { 1530 return vceqq_f32(a, b); 1531 } 1532 1533 // CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1534 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b 1535 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1536 // CHECK: ret <16 x i8> [[SEXT_I]] 1537 uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) { 1538 return vceqq_u8(a, b); 1539 } 1540 1541 // CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1542 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b 1543 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1544 // CHECK: ret <8 x i16> [[SEXT_I]] 1545 uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) { 1546 return vceqq_u16(a, b); 1547 } 1548 1549 // CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1550 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b 1551 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1552 // CHECK: ret <4 x i32> [[SEXT_I]] 1553 uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) { 1554 return vceqq_u32(a, b); 1555 } 1556 1557 // CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %a, <16 x i8> %b) #0 { 1558 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b 1559 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1560 // CHECK: ret <16 x i8> [[SEXT_I]] 1561 uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) { 1562 return vceqq_p8(a, b); 1563 } 1564 1565 1566 // CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1567 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b 1568 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1569 // CHECK: ret <8 x i8> [[SEXT_I]] 1570 uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) { 1571 return vcge_s8(a, b); 1572 } 1573 1574 // CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1575 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b 1576 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1577 // CHECK: ret <4 x i16> [[SEXT_I]] 1578 uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) { 1579 return vcge_s16(a, b); 1580 } 1581 1582 // CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1583 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b 1584 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1585 // CHECK: ret <2 x i32> [[SEXT_I]] 1586 uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) { 1587 return vcge_s32(a, b); 1588 } 1589 1590 // CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %a, <2 x float> %b) #0 { 1591 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b 1592 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1593 // CHECK: ret <2 x i32> [[SEXT_I]] 1594 uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) { 1595 return vcge_f32(a, b); 1596 } 1597 1598 // CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1599 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b 1600 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1601 // CHECK: ret <8 x i8> [[SEXT_I]] 1602 uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) { 1603 return vcge_u8(a, b); 1604 } 1605 1606 // CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1607 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b 1608 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1609 // CHECK: ret <4 x i16> [[SEXT_I]] 1610 uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) { 1611 return vcge_u16(a, b); 1612 } 1613 1614 // CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1615 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b 1616 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1617 // CHECK: ret <2 x i32> [[SEXT_I]] 1618 uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) { 1619 return vcge_u32(a, b); 1620 } 1621 1622 // CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1623 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b 1624 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1625 // CHECK: ret <16 x i8> [[SEXT_I]] 1626 uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) { 1627 return vcgeq_s8(a, b); 1628 } 1629 1630 // CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1631 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b 1632 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1633 // CHECK: ret <8 x i16> [[SEXT_I]] 1634 uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) { 1635 return vcgeq_s16(a, b); 1636 } 1637 1638 // CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1639 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b 1640 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1641 // CHECK: ret <4 x i32> [[SEXT_I]] 1642 uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) { 1643 return vcgeq_s32(a, b); 1644 } 1645 1646 // CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %a, <4 x float> %b) #0 { 1647 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b 1648 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1649 // CHECK: ret <4 x i32> [[SEXT_I]] 1650 uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) { 1651 return vcgeq_f32(a, b); 1652 } 1653 1654 // CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1655 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b 1656 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1657 // CHECK: ret <16 x i8> [[SEXT_I]] 1658 uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) { 1659 return vcgeq_u8(a, b); 1660 } 1661 1662 // CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1663 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b 1664 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1665 // CHECK: ret <8 x i16> [[SEXT_I]] 1666 uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) { 1667 return vcgeq_u16(a, b); 1668 } 1669 1670 // CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1671 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b 1672 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1673 // CHECK: ret <4 x i32> [[SEXT_I]] 1674 uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) { 1675 return vcgeq_u32(a, b); 1676 } 1677 1678 1679 // CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1680 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b 1681 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1682 // CHECK: ret <8 x i8> [[SEXT_I]] 1683 uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) { 1684 return vcgt_s8(a, b); 1685 } 1686 1687 // CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1688 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b 1689 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1690 // CHECK: ret <4 x i16> [[SEXT_I]] 1691 uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) { 1692 return vcgt_s16(a, b); 1693 } 1694 1695 // CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1696 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b 1697 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1698 // CHECK: ret <2 x i32> [[SEXT_I]] 1699 uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) { 1700 return vcgt_s32(a, b); 1701 } 1702 1703 // CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %a, <2 x float> %b) #0 { 1704 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b 1705 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1706 // CHECK: ret <2 x i32> [[SEXT_I]] 1707 uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) { 1708 return vcgt_f32(a, b); 1709 } 1710 1711 // CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1712 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b 1713 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1714 // CHECK: ret <8 x i8> [[SEXT_I]] 1715 uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) { 1716 return vcgt_u8(a, b); 1717 } 1718 1719 // CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1720 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b 1721 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1722 // CHECK: ret <4 x i16> [[SEXT_I]] 1723 uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) { 1724 return vcgt_u16(a, b); 1725 } 1726 1727 // CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1728 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b 1729 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1730 // CHECK: ret <2 x i32> [[SEXT_I]] 1731 uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) { 1732 return vcgt_u32(a, b); 1733 } 1734 1735 // CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1736 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b 1737 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1738 // CHECK: ret <16 x i8> [[SEXT_I]] 1739 uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) { 1740 return vcgtq_s8(a, b); 1741 } 1742 1743 // CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1744 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b 1745 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1746 // CHECK: ret <8 x i16> [[SEXT_I]] 1747 uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) { 1748 return vcgtq_s16(a, b); 1749 } 1750 1751 // CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1752 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b 1753 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1754 // CHECK: ret <4 x i32> [[SEXT_I]] 1755 uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) { 1756 return vcgtq_s32(a, b); 1757 } 1758 1759 // CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %a, <4 x float> %b) #0 { 1760 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b 1761 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1762 // CHECK: ret <4 x i32> [[SEXT_I]] 1763 uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) { 1764 return vcgtq_f32(a, b); 1765 } 1766 1767 // CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1768 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b 1769 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1770 // CHECK: ret <16 x i8> [[SEXT_I]] 1771 uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) { 1772 return vcgtq_u8(a, b); 1773 } 1774 1775 // CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1776 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b 1777 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1778 // CHECK: ret <8 x i16> [[SEXT_I]] 1779 uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) { 1780 return vcgtq_u16(a, b); 1781 } 1782 1783 // CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1784 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b 1785 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1786 // CHECK: ret <4 x i32> [[SEXT_I]] 1787 uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) { 1788 return vcgtq_u32(a, b); 1789 } 1790 1791 1792 // CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1793 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b 1794 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1795 // CHECK: ret <8 x i8> [[SEXT_I]] 1796 uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) { 1797 return vcle_s8(a, b); 1798 } 1799 1800 // CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1801 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b 1802 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1803 // CHECK: ret <4 x i16> [[SEXT_I]] 1804 uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) { 1805 return vcle_s16(a, b); 1806 } 1807 1808 // CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1809 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b 1810 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1811 // CHECK: ret <2 x i32> [[SEXT_I]] 1812 uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) { 1813 return vcle_s32(a, b); 1814 } 1815 1816 // CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %a, <2 x float> %b) #0 { 1817 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b 1818 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1819 // CHECK: ret <2 x i32> [[SEXT_I]] 1820 uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) { 1821 return vcle_f32(a, b); 1822 } 1823 1824 // CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1825 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b 1826 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1827 // CHECK: ret <8 x i8> [[SEXT_I]] 1828 uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) { 1829 return vcle_u8(a, b); 1830 } 1831 1832 // CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %a, <4 x i16> %b) #0 { 1833 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b 1834 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1835 // CHECK: ret <4 x i16> [[SEXT_I]] 1836 uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) { 1837 return vcle_u16(a, b); 1838 } 1839 1840 // CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %a, <2 x i32> %b) #0 { 1841 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b 1842 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1843 // CHECK: ret <2 x i32> [[SEXT_I]] 1844 uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) { 1845 return vcle_u32(a, b); 1846 } 1847 1848 // CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 1849 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b 1850 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1851 // CHECK: ret <16 x i8> [[SEXT_I]] 1852 uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) { 1853 return vcleq_s8(a, b); 1854 } 1855 1856 // CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 1857 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b 1858 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1859 // CHECK: ret <8 x i16> [[SEXT_I]] 1860 uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) { 1861 return vcleq_s16(a, b); 1862 } 1863 1864 // CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 1865 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b 1866 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1867 // CHECK: ret <4 x i32> [[SEXT_I]] 1868 uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) { 1869 return vcleq_s32(a, b); 1870 } 1871 1872 // CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %a, <4 x float> %b) #0 { 1873 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b 1874 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1875 // CHECK: ret <4 x i32> [[SEXT_I]] 1876 uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) { 1877 return vcleq_f32(a, b); 1878 } 1879 1880 // CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 1881 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b 1882 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1883 // CHECK: ret <16 x i8> [[SEXT_I]] 1884 uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) { 1885 return vcleq_u8(a, b); 1886 } 1887 1888 // CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 1889 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b 1890 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1891 // CHECK: ret <8 x i16> [[SEXT_I]] 1892 uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) { 1893 return vcleq_u16(a, b); 1894 } 1895 1896 // CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 1897 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b 1898 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1899 // CHECK: ret <4 x i32> [[SEXT_I]] 1900 uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) { 1901 return vcleq_u32(a, b); 1902 } 1903 1904 1905 // CHECK-LABEL: define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 { 1906 // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4 1907 // CHECK: ret <8 x i8> [[VCLS_V_I]] 1908 int8x8_t test_vcls_s8(int8x8_t a) { 1909 return vcls_s8(a); 1910 } 1911 1912 // CHECK-LABEL: define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 { 1913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 1914 // CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1915 // CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> [[VCLS_V_I]]) #4 1916 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8> 1917 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16> 1918 // CHECK: ret <4 x i16> [[TMP1]] 1919 int16x4_t test_vcls_s16(int16x4_t a) { 1920 return vcls_s16(a); 1921 } 1922 1923 // CHECK-LABEL: define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 { 1924 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 1925 // CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1926 // CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> [[VCLS_V_I]]) #4 1927 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8> 1928 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32> 1929 // CHECK: ret <2 x i32> [[TMP1]] 1930 int32x2_t test_vcls_s32(int32x2_t a) { 1931 return vcls_s32(a); 1932 } 1933 1934 // CHECK-LABEL: define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 { 1935 // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4 1936 // CHECK: ret <16 x i8> [[VCLSQ_V_I]] 1937 int8x16_t test_vclsq_s8(int8x16_t a) { 1938 return vclsq_s8(a); 1939 } 1940 1941 // CHECK-LABEL: define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 { 1942 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 1943 // CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1944 // CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> [[VCLSQ_V_I]]) #4 1945 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8> 1946 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16> 1947 // CHECK: ret <8 x i16> [[TMP1]] 1948 int16x8_t test_vclsq_s16(int16x8_t a) { 1949 return vclsq_s16(a); 1950 } 1951 1952 // CHECK-LABEL: define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 { 1953 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 1954 // CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1955 // CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> [[VCLSQ_V_I]]) #4 1956 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8> 1957 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32> 1958 // CHECK: ret <4 x i32> [[TMP1]] 1959 int32x4_t test_vclsq_s32(int32x4_t a) { 1960 return vclsq_s32(a); 1961 } 1962 1963 1964 // CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %a, <8 x i8> %b) #0 { 1965 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b 1966 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1967 // CHECK: ret <8 x i8> [[SEXT_I]] 1968 uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) { 1969 return vclt_s8(a, b); 1970 } 1971 1972 // CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %a, <4 x i16> %b) #0 { 1973 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b 1974 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1975 // CHECK: ret <4 x i16> [[SEXT_I]] 1976 uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) { 1977 return vclt_s16(a, b); 1978 } 1979 1980 // CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %a, <2 x i32> %b) #0 { 1981 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b 1982 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1983 // CHECK: ret <2 x i32> [[SEXT_I]] 1984 uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) { 1985 return vclt_s32(a, b); 1986 } 1987 1988 // CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %a, <2 x float> %b) #0 { 1989 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b 1990 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1991 // CHECK: ret <2 x i32> [[SEXT_I]] 1992 uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) { 1993 return vclt_f32(a, b); 1994 } 1995 1996 // CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %a, <8 x i8> %b) #0 { 1997 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b 1998 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1999 // CHECK: ret <8 x i8> [[SEXT_I]] 2000 uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) { 2001 return vclt_u8(a, b); 2002 } 2003 2004 // CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %a, <4 x i16> %b) #0 { 2005 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b 2006 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2007 // CHECK: ret <4 x i16> [[SEXT_I]] 2008 uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) { 2009 return vclt_u16(a, b); 2010 } 2011 2012 // CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %a, <2 x i32> %b) #0 { 2013 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b 2014 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2015 // CHECK: ret <2 x i32> [[SEXT_I]] 2016 uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) { 2017 return vclt_u32(a, b); 2018 } 2019 2020 // CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 2021 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b 2022 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2023 // CHECK: ret <16 x i8> [[SEXT_I]] 2024 uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) { 2025 return vcltq_s8(a, b); 2026 } 2027 2028 // CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 2029 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b 2030 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2031 // CHECK: ret <8 x i16> [[SEXT_I]] 2032 uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) { 2033 return vcltq_s16(a, b); 2034 } 2035 2036 // CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 2037 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b 2038 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2039 // CHECK: ret <4 x i32> [[SEXT_I]] 2040 uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) { 2041 return vcltq_s32(a, b); 2042 } 2043 2044 // CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %a, <4 x float> %b) #0 { 2045 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b 2046 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2047 // CHECK: ret <4 x i32> [[SEXT_I]] 2048 uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) { 2049 return vcltq_f32(a, b); 2050 } 2051 2052 // CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 2053 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b 2054 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2055 // CHECK: ret <16 x i8> [[SEXT_I]] 2056 uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) { 2057 return vcltq_u8(a, b); 2058 } 2059 2060 // CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 2061 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b 2062 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2063 // CHECK: ret <8 x i16> [[SEXT_I]] 2064 uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) { 2065 return vcltq_u16(a, b); 2066 } 2067 2068 // CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 2069 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b 2070 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2071 // CHECK: ret <4 x i32> [[SEXT_I]] 2072 uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) { 2073 return vcltq_u32(a, b); 2074 } 2075 2076 2077 // CHECK-LABEL: define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 { 2078 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 2079 // CHECK: ret <8 x i8> [[VCLZ_V_I]] 2080 int8x8_t test_vclz_s8(int8x8_t a) { 2081 return vclz_s8(a); 2082 } 2083 2084 // CHECK-LABEL: define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 { 2085 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2086 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2087 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 2088 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> 2089 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> 2090 // CHECK: ret <4 x i16> [[TMP1]] 2091 int16x4_t test_vclz_s16(int16x4_t a) { 2092 return vclz_s16(a); 2093 } 2094 2095 // CHECK-LABEL: define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 { 2096 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2097 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2098 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 2099 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> 2100 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> 2101 // CHECK: ret <2 x i32> [[TMP1]] 2102 int32x2_t test_vclz_s32(int32x2_t a) { 2103 return vclz_s32(a); 2104 } 2105 2106 // CHECK-LABEL: define <8 x i8> @test_vclz_u8(<8 x i8> %a) #0 { 2107 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 2108 // CHECK: ret <8 x i8> [[VCLZ_V_I]] 2109 uint8x8_t test_vclz_u8(uint8x8_t a) { 2110 return vclz_u8(a); 2111 } 2112 2113 // CHECK-LABEL: define <4 x i16> @test_vclz_u16(<4 x i16> %a) #0 { 2114 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2115 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2116 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 2117 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> 2118 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> 2119 // CHECK: ret <4 x i16> [[TMP1]] 2120 uint16x4_t test_vclz_u16(uint16x4_t a) { 2121 return vclz_u16(a); 2122 } 2123 2124 // CHECK-LABEL: define <2 x i32> @test_vclz_u32(<2 x i32> %a) #0 { 2125 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2126 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2127 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 2128 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> 2129 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> 2130 // CHECK: ret <2 x i32> [[TMP1]] 2131 uint32x2_t test_vclz_u32(uint32x2_t a) { 2132 return vclz_u32(a); 2133 } 2134 2135 // CHECK-LABEL: define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 { 2136 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 2137 // CHECK: ret <16 x i8> [[VCLZQ_V_I]] 2138 int8x16_t test_vclzq_s8(int8x16_t a) { 2139 return vclzq_s8(a); 2140 } 2141 2142 // CHECK-LABEL: define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 { 2143 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 2144 // CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2145 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4 2146 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> 2147 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16> 2148 // CHECK: ret <8 x i16> [[TMP1]] 2149 int16x8_t test_vclzq_s16(int16x8_t a) { 2150 return vclzq_s16(a); 2151 } 2152 2153 // CHECK-LABEL: define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 { 2154 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2155 // CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2156 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4 2157 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> 2158 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32> 2159 // CHECK: ret <4 x i32> [[TMP1]] 2160 int32x4_t test_vclzq_s32(int32x4_t a) { 2161 return vclzq_s32(a); 2162 } 2163 2164 // CHECK-LABEL: define <16 x i8> @test_vclzq_u8(<16 x i8> %a) #0 { 2165 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 2166 // CHECK: ret <16 x i8> [[VCLZQ_V_I]] 2167 uint8x16_t test_vclzq_u8(uint8x16_t a) { 2168 return vclzq_u8(a); 2169 } 2170 2171 // CHECK-LABEL: define <8 x i16> @test_vclzq_u16(<8 x i16> %a) #0 { 2172 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 2173 // CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2174 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4 2175 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> 2176 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16> 2177 // CHECK: ret <8 x i16> [[TMP1]] 2178 uint16x8_t test_vclzq_u16(uint16x8_t a) { 2179 return vclzq_u16(a); 2180 } 2181 2182 // CHECK-LABEL: define <4 x i32> @test_vclzq_u32(<4 x i32> %a) #0 { 2183 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2184 // CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2185 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4 2186 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> 2187 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32> 2188 // CHECK: ret <4 x i32> [[TMP1]] 2189 uint32x4_t test_vclzq_u32(uint32x4_t a) { 2190 return vclzq_u32(a); 2191 } 2192 2193 2194 // CHECK-LABEL: define <8 x i8> @test_vcnt_u8(<8 x i8> %a) #0 { 2195 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 2196 // CHECK: ret <8 x i8> [[VCNT_V_I]] 2197 uint8x8_t test_vcnt_u8(uint8x8_t a) { 2198 return vcnt_u8(a); 2199 } 2200 2201 // CHECK-LABEL: define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 { 2202 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 2203 // CHECK: ret <8 x i8> [[VCNT_V_I]] 2204 int8x8_t test_vcnt_s8(int8x8_t a) { 2205 return vcnt_s8(a); 2206 } 2207 2208 // CHECK-LABEL: define <8 x i8> @test_vcnt_p8(<8 x i8> %a) #0 { 2209 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 2210 // CHECK: ret <8 x i8> [[VCNT_V_I]] 2211 poly8x8_t test_vcnt_p8(poly8x8_t a) { 2212 return vcnt_p8(a); 2213 } 2214 2215 // CHECK-LABEL: define <16 x i8> @test_vcntq_u8(<16 x i8> %a) #0 { 2216 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 2217 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] 2218 uint8x16_t test_vcntq_u8(uint8x16_t a) { 2219 return vcntq_u8(a); 2220 } 2221 2222 // CHECK-LABEL: define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 { 2223 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 2224 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] 2225 int8x16_t test_vcntq_s8(int8x16_t a) { 2226 return vcntq_s8(a); 2227 } 2228 2229 // CHECK-LABEL: define <16 x i8> @test_vcntq_p8(<16 x i8> %a) #0 { 2230 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 2231 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] 2232 poly8x16_t test_vcntq_p8(poly8x16_t a) { 2233 return vcntq_p8(a); 2234 } 2235 2236 2237 // CHECK-LABEL: define <16 x i8> @test_vcombine_s8(<8 x i8> %a, <8 x i8> %b) #0 { 2238 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2239 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 2240 int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) { 2241 return vcombine_s8(a, b); 2242 } 2243 2244 // CHECK-LABEL: define <8 x i16> @test_vcombine_s16(<4 x i16> %a, <4 x i16> %b) #0 { 2245 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2246 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 2247 int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) { 2248 return vcombine_s16(a, b); 2249 } 2250 2251 // CHECK-LABEL: define <4 x i32> @test_vcombine_s32(<2 x i32> %a, <2 x i32> %b) #0 { 2252 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2253 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 2254 int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) { 2255 return vcombine_s32(a, b); 2256 } 2257 2258 // CHECK-LABEL: define <2 x i64> @test_vcombine_s64(<1 x i64> %a, <1 x i64> %b) #0 { 2259 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1> 2260 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 2261 int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) { 2262 return vcombine_s64(a, b); 2263 } 2264 2265 // CHECK-LABEL: define <8 x half> @test_vcombine_f16(<4 x half> %a, <4 x half> %b) #0 { 2266 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2267 // CHECK: ret <8 x half> [[SHUFFLE_I]] 2268 float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) { 2269 return vcombine_f16(a, b); 2270 } 2271 2272 // CHECK-LABEL: define <4 x float> @test_vcombine_f32(<2 x float> %a, <2 x float> %b) #0 { 2273 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2274 // CHECK: ret <4 x float> [[SHUFFLE_I]] 2275 float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) { 2276 return vcombine_f32(a, b); 2277 } 2278 2279 // CHECK-LABEL: define <16 x i8> @test_vcombine_u8(<8 x i8> %a, <8 x i8> %b) #0 { 2280 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2281 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 2282 uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) { 2283 return vcombine_u8(a, b); 2284 } 2285 2286 // CHECK-LABEL: define <8 x i16> @test_vcombine_u16(<4 x i16> %a, <4 x i16> %b) #0 { 2287 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2288 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 2289 uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) { 2290 return vcombine_u16(a, b); 2291 } 2292 2293 // CHECK-LABEL: define <4 x i32> @test_vcombine_u32(<2 x i32> %a, <2 x i32> %b) #0 { 2294 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2295 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 2296 uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) { 2297 return vcombine_u32(a, b); 2298 } 2299 2300 // CHECK-LABEL: define <2 x i64> @test_vcombine_u64(<1 x i64> %a, <1 x i64> %b) #0 { 2301 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1> 2302 // CHECK: ret <2 x i64> [[SHUFFLE_I]] 2303 uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) { 2304 return vcombine_u64(a, b); 2305 } 2306 2307 // CHECK-LABEL: define <16 x i8> @test_vcombine_p8(<8 x i8> %a, <8 x i8> %b) #0 { 2308 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2309 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 2310 poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) { 2311 return vcombine_p8(a, b); 2312 } 2313 2314 // CHECK-LABEL: define <8 x i16> @test_vcombine_p16(<4 x i16> %a, <4 x i16> %b) #0 { 2315 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2316 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 2317 poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) { 2318 return vcombine_p16(a, b); 2319 } 2320 2321 2322 // CHECK-LABEL: define <8 x i8> @test_vcreate_s8(i64 %a) #0 { 2323 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> 2324 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4 2325 // CHECK: ret <8 x i8> [[VCLZ_V_I]] 2326 int8x8_t test_vcreate_s8(uint64_t a) { 2327 return vclz_s8(vcreate_s8(a)); 2328 } 2329 2330 // CHECK-LABEL: define <4 x i16> @test_vcreate_s16(i64 %a) #0 { 2331 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> 2332 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2333 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2334 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 2335 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> 2336 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> 2337 // CHECK: ret <4 x i16> [[TMP2]] 2338 int16x4_t test_vcreate_s16(uint64_t a) { 2339 return vclz_s16(vcreate_s16(a)); 2340 } 2341 2342 // CHECK-LABEL: define <2 x i32> @test_vcreate_s32(i64 %a) #0 { 2343 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32> 2344 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> 2345 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2346 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 2347 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> 2348 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> 2349 // CHECK: ret <2 x i32> [[TMP2]] 2350 int32x2_t test_vcreate_s32(uint64_t a) { 2351 return vclz_s32(vcreate_s32(a)); 2352 } 2353 2354 // CHECK-LABEL: define <4 x half> @test_vcreate_f16(i64 %a) #0 { 2355 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half> 2356 // CHECK: ret <4 x half> [[TMP0]] 2357 float16x4_t test_vcreate_f16(uint64_t a) { 2358 return vcreate_f16(a); 2359 } 2360 2361 // CHECK-LABEL: define <2 x float> @test_vcreate_f32(i64 %a) #0 { 2362 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float> 2363 // CHECK: ret <2 x float> [[TMP0]] 2364 float32x2_t test_vcreate_f32(uint64_t a) { 2365 return vcreate_f32(a); 2366 } 2367 2368 // CHECK-LABEL: define <8 x i8> @test_vcreate_u8(i64 %a) #0 { 2369 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> 2370 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4 2371 // CHECK: ret <8 x i8> [[VCLZ_V_I]] 2372 uint8x8_t test_vcreate_u8(uint64_t a) { 2373 return vclz_s8(vcreate_u8(a)); 2374 } 2375 2376 // CHECK-LABEL: define <4 x i16> @test_vcreate_u16(i64 %a) #0 { 2377 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> 2378 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2379 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 2380 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 2381 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> 2382 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> 2383 // CHECK: ret <4 x i16> [[TMP2]] 2384 uint16x4_t test_vcreate_u16(uint64_t a) { 2385 return vclz_s16(vcreate_u16(a)); 2386 } 2387 2388 // CHECK-LABEL: define <2 x i32> @test_vcreate_u32(i64 %a) #0 { 2389 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32> 2390 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> 2391 // CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 2392 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 2393 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> 2394 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> 2395 // CHECK: ret <2 x i32> [[TMP2]] 2396 uint32x2_t test_vcreate_u32(uint64_t a) { 2397 return vclz_s32(vcreate_u32(a)); 2398 } 2399 2400 2401 // We have two ways of lowering that. Either with one 'vmov d, r, r' or 2402 // with two 'vmov d[],r'. LLVM does the latter. We may want to be less 2403 // strict about the matching pattern if it starts causing problem. 2404 // CHECK-LABEL: define <1 x i64> @test_vcreate_u64(i64 %a) #0 { 2405 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> 2406 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]] 2407 // CHECK: ret <1 x i64> [[ADD_I]] 2408 uint64x1_t test_vcreate_u64(uint64_t a) { 2409 uint64x1_t tmp = vcreate_u64(a); 2410 return vadd_u64(tmp, tmp); 2411 2412 } 2413 2414 // CHECK-LABEL: define <8 x i8> @test_vcreate_p8(i64 %a) #0 { 2415 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> 2416 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]]) #4 2417 // CHECK: ret <8 x i8> [[VCNT_V_I]] 2418 poly8x8_t test_vcreate_p8(uint64_t a) { 2419 return vcnt_p8(vcreate_p8(a)); 2420 } 2421 2422 // CHECK-LABEL: define <4 x i16> @test_vcreate_p16(i64 %a) #0 { 2423 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> 2424 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2425 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2426 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 2427 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]]) #4 2428 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16> 2429 // CHECK: ret <4 x i16> [[TMP4]] 2430 poly16x4_t test_vcreate_p16(uint64_t a) { 2431 poly16x4_t tmp = vcreate_p16(a); 2432 return vbsl_p16(tmp, tmp, tmp); 2433 } 2434 2435 // CHECK-LABEL: define <1 x i64> @test_vcreate_s64(i64 %a) #0 { 2436 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> 2437 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]] 2438 // CHECK: ret <1 x i64> [[ADD_I]] 2439 int64x1_t test_vcreate_s64(uint64_t a) { 2440 int64x1_t tmp = vcreate_s64(a); 2441 return vadd_s64(tmp, tmp); 2442 } 2443 2444 2445 // CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(<4 x float> %a) #0 { 2446 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2447 // CHECK: [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2448 // CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]]) #4 2449 // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8> 2450 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half> 2451 // CHECK: ret <4 x half> [[TMP1]] 2452 float16x4_t test_vcvt_f16_f32(float32x4_t a) { 2453 return vcvt_f16_f32(a); 2454 } 2455 2456 2457 // CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 { 2458 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2459 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2460 // CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> 2461 // CHECK: ret <2 x float> [[VCVT_I]] 2462 float32x2_t test_vcvt_f32_s32(int32x2_t a) { 2463 return vcvt_f32_s32(a); 2464 } 2465 2466 // CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 { 2467 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2468 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2469 // CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float> 2470 // CHECK: ret <2 x float> [[VCVT_I]] 2471 float32x2_t test_vcvt_f32_u32(uint32x2_t a) { 2472 return vcvt_f32_u32(a); 2473 } 2474 2475 // CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 { 2476 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2477 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2478 // CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float> 2479 // CHECK: ret <4 x float> [[VCVT_I]] 2480 float32x4_t test_vcvtq_f32_s32(int32x4_t a) { 2481 return vcvtq_f32_s32(a); 2482 } 2483 2484 // CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 { 2485 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2486 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2487 // CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> 2488 // CHECK: ret <4 x float> [[VCVT_I]] 2489 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) { 2490 return vcvtq_f32_u32(a); 2491 } 2492 2493 2494 // CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(<4 x half> %a) #0 { 2495 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 2496 // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2497 // CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #4 2498 // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8> 2499 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x float> 2500 // CHECK: ret <4 x float> [[TMP1]] 2501 float32x4_t test_vcvt_f32_f16(float16x4_t a) { 2502 return vcvt_f32_f16(a); 2503 } 2504 2505 2506 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 { 2507 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2508 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2509 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1) 2510 // CHECK: ret <2 x float> [[VCVT_N1]] 2511 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { 2512 return vcvt_n_f32_s32(a, 1); 2513 } 2514 2515 // CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 { 2516 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2517 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2518 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1) 2519 // CHECK: ret <2 x float> [[VCVT_N1]] 2520 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { 2521 return vcvt_n_f32_u32(a, 1); 2522 } 2523 2524 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 { 2525 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2526 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2527 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3) 2528 // CHECK: ret <4 x float> [[VCVT_N1]] 2529 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { 2530 return vcvtq_n_f32_s32(a, 3); 2531 } 2532 2533 // CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 { 2534 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2535 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2536 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3) 2537 // CHECK: ret <4 x float> [[VCVT_N1]] 2538 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { 2539 return vcvtq_n_f32_u32(a, 3); 2540 } 2541 2542 2543 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 { 2544 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 2545 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2546 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1) 2547 // CHECK: ret <2 x i32> [[VCVT_N1]] 2548 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { 2549 return vcvt_n_s32_f32(a, 1); 2550 } 2551 2552 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 { 2553 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2554 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2555 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3) 2556 // CHECK: ret <4 x i32> [[VCVT_N1]] 2557 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { 2558 return vcvtq_n_s32_f32(a, 3); 2559 } 2560 2561 2562 // CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 { 2563 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 2564 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2565 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1) 2566 // CHECK: ret <2 x i32> [[VCVT_N1]] 2567 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { 2568 return vcvt_n_u32_f32(a, 1); 2569 } 2570 2571 // CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 { 2572 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2573 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2574 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3) 2575 // CHECK: ret <4 x i32> [[VCVT_N1]] 2576 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { 2577 return vcvtq_n_u32_f32(a, 3); 2578 } 2579 2580 2581 // CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 { 2582 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 2583 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2584 // CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> [[TMP1]] to <2 x i32> 2585 // CHECK: ret <2 x i32> [[VCVT_I]] 2586 int32x2_t test_vcvt_s32_f32(float32x2_t a) { 2587 return vcvt_s32_f32(a); 2588 } 2589 2590 // CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 { 2591 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2592 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2593 // CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32> 2594 // CHECK: ret <4 x i32> [[VCVT_I]] 2595 int32x4_t test_vcvtq_s32_f32(float32x4_t a) { 2596 return vcvtq_s32_f32(a); 2597 } 2598 2599 2600 // CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 { 2601 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 2602 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2603 // CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> [[TMP1]] to <2 x i32> 2604 // CHECK: ret <2 x i32> [[VCVT_I]] 2605 uint32x2_t test_vcvt_u32_f32(float32x2_t a) { 2606 return vcvt_u32_f32(a); 2607 } 2608 2609 // CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 { 2610 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 2611 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2612 // CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32> 2613 // CHECK: ret <4 x i32> [[VCVT_I]] 2614 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) { 2615 return vcvtq_u32_f32(a); 2616 } 2617 2618 2619 // CHECK-LABEL: define <8 x i8> @test_vdup_lane_u8(<8 x i8> %a) #0 { 2620 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2621 // CHECK: ret <8 x i8> [[SHUFFLE]] 2622 uint8x8_t test_vdup_lane_u8(uint8x8_t a) { 2623 return vdup_lane_u8(a, 7); 2624 } 2625 2626 // CHECK-LABEL: define <4 x i16> @test_vdup_lane_u16(<4 x i16> %a) #0 { 2627 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2628 // CHECK: ret <4 x i16> [[SHUFFLE]] 2629 uint16x4_t test_vdup_lane_u16(uint16x4_t a) { 2630 return vdup_lane_u16(a, 3); 2631 } 2632 2633 // CHECK-LABEL: define <2 x i32> @test_vdup_lane_u32(<2 x i32> %a) #0 { 2634 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 1> 2635 // CHECK: ret <2 x i32> [[SHUFFLE]] 2636 uint32x2_t test_vdup_lane_u32(uint32x2_t a) { 2637 return vdup_lane_u32(a, 1); 2638 } 2639 2640 // CHECK-LABEL: define <8 x i8> @test_vdup_lane_s8(<8 x i8> %a) #0 { 2641 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2642 // CHECK: ret <8 x i8> [[SHUFFLE]] 2643 int8x8_t test_vdup_lane_s8(int8x8_t a) { 2644 return vdup_lane_s8(a, 7); 2645 } 2646 2647 // CHECK-LABEL: define <4 x i16> @test_vdup_lane_s16(<4 x i16> %a) #0 { 2648 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2649 // CHECK: ret <4 x i16> [[SHUFFLE]] 2650 int16x4_t test_vdup_lane_s16(int16x4_t a) { 2651 return vdup_lane_s16(a, 3); 2652 } 2653 2654 // CHECK-LABEL: define <2 x i32> @test_vdup_lane_s32(<2 x i32> %a) #0 { 2655 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 1> 2656 // CHECK: ret <2 x i32> [[SHUFFLE]] 2657 int32x2_t test_vdup_lane_s32(int32x2_t a) { 2658 return vdup_lane_s32(a, 1); 2659 } 2660 2661 // CHECK-LABEL: define <8 x i8> @test_vdup_lane_p8(<8 x i8> %a) #0 { 2662 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2663 // CHECK: ret <8 x i8> [[SHUFFLE]] 2664 poly8x8_t test_vdup_lane_p8(poly8x8_t a) { 2665 return vdup_lane_p8(a, 7); 2666 } 2667 2668 // CHECK-LABEL: define <4 x i16> @test_vdup_lane_p16(<4 x i16> %a) #0 { 2669 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 2670 // CHECK: ret <4 x i16> [[SHUFFLE]] 2671 poly16x4_t test_vdup_lane_p16(poly16x4_t a) { 2672 return vdup_lane_p16(a, 3); 2673 } 2674 2675 // CHECK-LABEL: define <2 x float> @test_vdup_lane_f32(<2 x float> %a) #0 { 2676 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 1> 2677 // CHECK: ret <2 x float> [[SHUFFLE]] 2678 float32x2_t test_vdup_lane_f32(float32x2_t a) { 2679 return vdup_lane_f32(a, 1); 2680 } 2681 2682 // CHECK-LABEL: define <16 x i8> @test_vdupq_lane_u8(<8 x i8> %a) #0 { 2683 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2684 // CHECK: ret <16 x i8> [[SHUFFLE]] 2685 uint8x16_t test_vdupq_lane_u8(uint8x8_t a) { 2686 return vdupq_lane_u8(a, 7); 2687 } 2688 2689 // CHECK-LABEL: define <8 x i16> @test_vdupq_lane_u16(<4 x i16> %a) #0 { 2690 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2691 // CHECK: ret <8 x i16> [[SHUFFLE]] 2692 uint16x8_t test_vdupq_lane_u16(uint16x4_t a) { 2693 return vdupq_lane_u16(a, 3); 2694 } 2695 2696 // CHECK-LABEL: define <4 x i32> @test_vdupq_lane_u32(<2 x i32> %a) #0 { 2697 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2698 // CHECK: ret <4 x i32> [[SHUFFLE]] 2699 uint32x4_t test_vdupq_lane_u32(uint32x2_t a) { 2700 return vdupq_lane_u32(a, 1); 2701 } 2702 2703 // CHECK-LABEL: define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %a) #0 { 2704 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2705 // CHECK: ret <16 x i8> [[SHUFFLE]] 2706 int8x16_t test_vdupq_lane_s8(int8x8_t a) { 2707 return vdupq_lane_s8(a, 7); 2708 } 2709 2710 // CHECK-LABEL: define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %a) #0 { 2711 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2712 // CHECK: ret <8 x i16> [[SHUFFLE]] 2713 int16x8_t test_vdupq_lane_s16(int16x4_t a) { 2714 return vdupq_lane_s16(a, 3); 2715 } 2716 2717 // CHECK-LABEL: define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %a) #0 { 2718 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2719 // CHECK: ret <4 x i32> [[SHUFFLE]] 2720 int32x4_t test_vdupq_lane_s32(int32x2_t a) { 2721 return vdupq_lane_s32(a, 1); 2722 } 2723 2724 // CHECK-LABEL: define <16 x i8> @test_vdupq_lane_p8(<8 x i8> %a) #0 { 2725 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 2726 // CHECK: ret <16 x i8> [[SHUFFLE]] 2727 poly8x16_t test_vdupq_lane_p8(poly8x8_t a) { 2728 return vdupq_lane_p8(a, 7); 2729 } 2730 2731 // CHECK-LABEL: define <8 x i16> @test_vdupq_lane_p16(<4 x i16> %a) #0 { 2732 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 2733 // CHECK: ret <8 x i16> [[SHUFFLE]] 2734 poly16x8_t test_vdupq_lane_p16(poly16x4_t a) { 2735 return vdupq_lane_p16(a, 3); 2736 } 2737 2738 // CHECK-LABEL: define <4 x float> @test_vdupq_lane_f32(<2 x float> %a) #0 { 2739 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2740 // CHECK: ret <4 x float> [[SHUFFLE]] 2741 float32x4_t test_vdupq_lane_f32(float32x2_t a) { 2742 return vdupq_lane_f32(a, 1); 2743 } 2744 2745 // CHECK-LABEL: define <1 x i64> @test_vdup_lane_s64(<1 x i64> %a) #0 { 2746 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer 2747 // CHECK: ret <1 x i64> [[SHUFFLE]] 2748 int64x1_t test_vdup_lane_s64(int64x1_t a) { 2749 return vdup_lane_s64(a, 0); 2750 } 2751 2752 // CHECK-LABEL: define <1 x i64> @test_vdup_lane_u64(<1 x i64> %a) #0 { 2753 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer 2754 // CHECK: ret <1 x i64> [[SHUFFLE]] 2755 uint64x1_t test_vdup_lane_u64(uint64x1_t a) { 2756 return vdup_lane_u64(a, 0); 2757 } 2758 2759 // CHECK-LABEL: define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %a) #0 { 2760 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer 2761 // CHECK: ret <2 x i64> [[SHUFFLE]] 2762 int64x2_t test_vdupq_lane_s64(int64x1_t a) { 2763 return vdupq_lane_s64(a, 0); 2764 } 2765 2766 // CHECK-LABEL: define <2 x i64> @test_vdupq_lane_u64(<1 x i64> %a) #0 { 2767 // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer 2768 // CHECK: ret <2 x i64> [[SHUFFLE]] 2769 uint64x2_t test_vdupq_lane_u64(uint64x1_t a) { 2770 return vdupq_lane_u64(a, 0); 2771 } 2772 2773 2774 // CHECK-LABEL: define <8 x i8> @test_vdup_n_u8(i8 zeroext %a) #0 { 2775 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 2776 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 2777 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 2778 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 2779 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 2780 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 2781 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 2782 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 2783 // CHECK: ret <8 x i8> [[VECINIT7_I]] 2784 uint8x8_t test_vdup_n_u8(uint8_t a) { 2785 return vdup_n_u8(a); 2786 } 2787 2788 // CHECK-LABEL: define <4 x i16> @test_vdup_n_u16(i16 zeroext %a) #0 { 2789 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 2790 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 2791 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 2792 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 2793 // CHECK: ret <4 x i16> [[VECINIT3_I]] 2794 uint16x4_t test_vdup_n_u16(uint16_t a) { 2795 return vdup_n_u16(a); 2796 } 2797 2798 // CHECK-LABEL: define <2 x i32> @test_vdup_n_u32(i32 %a) #0 { 2799 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 2800 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 2801 // CHECK: ret <2 x i32> [[VECINIT1_I]] 2802 uint32x2_t test_vdup_n_u32(uint32_t a) { 2803 return vdup_n_u32(a); 2804 } 2805 2806 // CHECK-LABEL: define <8 x i8> @test_vdup_n_s8(i8 signext %a) #0 { 2807 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 2808 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 2809 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 2810 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 2811 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 2812 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 2813 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 2814 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 2815 // CHECK: ret <8 x i8> [[VECINIT7_I]] 2816 int8x8_t test_vdup_n_s8(int8_t a) { 2817 return vdup_n_s8(a); 2818 } 2819 2820 // CHECK-LABEL: define <4 x i16> @test_vdup_n_s16(i16 signext %a) #0 { 2821 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 2822 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 2823 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 2824 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 2825 // CHECK: ret <4 x i16> [[VECINIT3_I]] 2826 int16x4_t test_vdup_n_s16(int16_t a) { 2827 return vdup_n_s16(a); 2828 } 2829 2830 // CHECK-LABEL: define <2 x i32> @test_vdup_n_s32(i32 %a) #0 { 2831 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 2832 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 2833 // CHECK: ret <2 x i32> [[VECINIT1_I]] 2834 int32x2_t test_vdup_n_s32(int32_t a) { 2835 return vdup_n_s32(a); 2836 } 2837 2838 // CHECK-LABEL: define <8 x i8> @test_vdup_n_p8(i8 signext %a) #0 { 2839 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 2840 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 2841 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 2842 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 2843 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 2844 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 2845 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 2846 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 2847 // CHECK: ret <8 x i8> [[VECINIT7_I]] 2848 poly8x8_t test_vdup_n_p8(poly8_t a) { 2849 return vdup_n_p8(a); 2850 } 2851 2852 // CHECK-LABEL: define <4 x i16> @test_vdup_n_p16(i16 signext %a) #0 { 2853 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 2854 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 2855 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 2856 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 2857 // CHECK: ret <4 x i16> [[VECINIT3_I]] 2858 poly16x4_t test_vdup_n_p16(poly16_t a) { 2859 return vdup_n_p16(a); 2860 } 2861 2862 // CHECK-LABEL: define <4 x half> @test_vdup_n_f16(half* %a) #0 { 2863 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 2864 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0 2865 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1 2866 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2 2867 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3 2868 // CHECK: ret <4 x half> [[VECINIT3]] 2869 float16x4_t test_vdup_n_f16(float16_t *a) { 2870 return vdup_n_f16(*a); 2871 } 2872 2873 // CHECK-LABEL: define <2 x float> @test_vdup_n_f32(float %a) #0 { 2874 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0 2875 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1 2876 // CHECK: ret <2 x float> [[VECINIT1_I]] 2877 float32x2_t test_vdup_n_f32(float32_t a) { 2878 return vdup_n_f32(a); 2879 } 2880 2881 // CHECK-LABEL: define <16 x i8> @test_vdupq_n_u8(i8 zeroext %a) #0 { 2882 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 2883 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 2884 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 2885 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 2886 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 2887 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 2888 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 2889 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 2890 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 2891 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 2892 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 2893 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 2894 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 2895 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 2896 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 2897 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 2898 // CHECK: ret <16 x i8> [[VECINIT15_I]] 2899 uint8x16_t test_vdupq_n_u8(uint8_t a) { 2900 return vdupq_n_u8(a); 2901 } 2902 2903 // CHECK-LABEL: define <8 x i16> @test_vdupq_n_u16(i16 zeroext %a) #0 { 2904 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 2905 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 2906 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 2907 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 2908 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 2909 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 2910 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 2911 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 2912 // CHECK: ret <8 x i16> [[VECINIT7_I]] 2913 uint16x8_t test_vdupq_n_u16(uint16_t a) { 2914 return vdupq_n_u16(a); 2915 } 2916 2917 // CHECK-LABEL: define <4 x i32> @test_vdupq_n_u32(i32 %a) #0 { 2918 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 2919 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 2920 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 2921 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3 2922 // CHECK: ret <4 x i32> [[VECINIT3_I]] 2923 uint32x4_t test_vdupq_n_u32(uint32_t a) { 2924 return vdupq_n_u32(a); 2925 } 2926 2927 // CHECK-LABEL: define <16 x i8> @test_vdupq_n_s8(i8 signext %a) #0 { 2928 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 2929 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 2930 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 2931 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 2932 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 2933 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 2934 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 2935 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 2936 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 2937 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 2938 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 2939 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 2940 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 2941 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 2942 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 2943 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 2944 // CHECK: ret <16 x i8> [[VECINIT15_I]] 2945 int8x16_t test_vdupq_n_s8(int8_t a) { 2946 return vdupq_n_s8(a); 2947 } 2948 2949 // CHECK-LABEL: define <8 x i16> @test_vdupq_n_s16(i16 signext %a) #0 { 2950 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 2951 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 2952 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 2953 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 2954 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 2955 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 2956 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 2957 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 2958 // CHECK: ret <8 x i16> [[VECINIT7_I]] 2959 int16x8_t test_vdupq_n_s16(int16_t a) { 2960 return vdupq_n_s16(a); 2961 } 2962 2963 // CHECK-LABEL: define <4 x i32> @test_vdupq_n_s32(i32 %a) #0 { 2964 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 2965 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 2966 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 2967 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3 2968 // CHECK: ret <4 x i32> [[VECINIT3_I]] 2969 int32x4_t test_vdupq_n_s32(int32_t a) { 2970 return vdupq_n_s32(a); 2971 } 2972 2973 // CHECK-LABEL: define <16 x i8> @test_vdupq_n_p8(i8 signext %a) #0 { 2974 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 2975 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 2976 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 2977 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 2978 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 2979 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 2980 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 2981 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 2982 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 2983 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 2984 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 2985 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 2986 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 2987 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 2988 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 2989 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 2990 // CHECK: ret <16 x i8> [[VECINIT15_I]] 2991 poly8x16_t test_vdupq_n_p8(poly8_t a) { 2992 return vdupq_n_p8(a); 2993 } 2994 2995 // CHECK-LABEL: define <8 x i16> @test_vdupq_n_p16(i16 signext %a) #0 { 2996 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 2997 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 2998 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 2999 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 3000 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 3001 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 3002 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 3003 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 3004 // CHECK: ret <8 x i16> [[VECINIT7_I]] 3005 poly16x8_t test_vdupq_n_p16(poly16_t a) { 3006 return vdupq_n_p16(a); 3007 } 3008 3009 // CHECK-LABEL: define <8 x half> @test_vdupq_n_f16(half* %a) #0 { 3010 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 3011 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0 3012 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1 3013 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2 3014 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3 3015 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4 3016 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5 3017 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6 3018 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7 3019 // CHECK: ret <8 x half> [[VECINIT7]] 3020 float16x8_t test_vdupq_n_f16(float16_t *a) { 3021 return vdupq_n_f16(*a); 3022 } 3023 3024 // CHECK-LABEL: define <4 x float> @test_vdupq_n_f32(float %a) #0 { 3025 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0 3026 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1 3027 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2 3028 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3 3029 // CHECK: ret <4 x float> [[VECINIT3_I]] 3030 float32x4_t test_vdupq_n_f32(float32_t a) { 3031 return vdupq_n_f32(a); 3032 } 3033 3034 // CHECK-LABEL: define <1 x i64> @test_vdup_n_s64(i64 %a) #0 { 3035 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 3036 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] 3037 // CHECK: ret <1 x i64> [[ADD_I]] 3038 int64x1_t test_vdup_n_s64(int64_t a) { 3039 int64x1_t tmp = vdup_n_s64(a); 3040 return vadd_s64(tmp, tmp); 3041 } 3042 3043 // CHECK-LABEL: define <1 x i64> @test_vdup_n_u64(i64 %a) #0 { 3044 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 3045 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] 3046 // CHECK: ret <1 x i64> [[ADD_I]] 3047 uint64x1_t test_vdup_n_u64(uint64_t a) { 3048 int64x1_t tmp = vdup_n_u64(a); 3049 return vadd_s64(tmp, tmp); 3050 3051 } 3052 3053 // CHECK-LABEL: define <2 x i64> @test_vdupq_n_s64(i64 %a) #0 { 3054 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 3055 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 3056 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]] 3057 // CHECK: ret <2 x i64> [[ADD_I]] 3058 int64x2_t test_vdupq_n_s64(int64_t a) { 3059 int64x2_t tmp = vdupq_n_s64(a); 3060 return vaddq_s64(tmp, tmp); 3061 } 3062 3063 // CHECK-LABEL: define <2 x i64> @test_vdupq_n_u64(i64 %a) #0 { 3064 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 3065 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 3066 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]] 3067 // CHECK: ret <2 x i64> [[ADD_I]] 3068 uint64x2_t test_vdupq_n_u64(uint64_t a) { 3069 int64x2_t tmp = vdupq_n_u64(a); 3070 return vaddq_u64(tmp, tmp); 3071 } 3072 3073 3074 // CHECK-LABEL: define <8 x i8> @test_veor_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3075 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b 3076 // CHECK: ret <8 x i8> [[XOR_I]] 3077 int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) { 3078 return veor_s8(a, b); 3079 } 3080 3081 // CHECK-LABEL: define <4 x i16> @test_veor_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3082 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b 3083 // CHECK: ret <4 x i16> [[XOR_I]] 3084 int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) { 3085 return veor_s16(a, b); 3086 } 3087 3088 // CHECK-LABEL: define <2 x i32> @test_veor_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3089 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b 3090 // CHECK: ret <2 x i32> [[XOR_I]] 3091 int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) { 3092 return veor_s32(a, b); 3093 } 3094 3095 // CHECK-LABEL: define <1 x i64> @test_veor_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3096 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b 3097 // CHECK: ret <1 x i64> [[XOR_I]] 3098 int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) { 3099 return veor_s64(a, b); 3100 } 3101 3102 // CHECK-LABEL: define <8 x i8> @test_veor_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3103 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b 3104 // CHECK: ret <8 x i8> [[XOR_I]] 3105 uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) { 3106 return veor_u8(a, b); 3107 } 3108 3109 // CHECK-LABEL: define <4 x i16> @test_veor_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3110 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b 3111 // CHECK: ret <4 x i16> [[XOR_I]] 3112 uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) { 3113 return veor_u16(a, b); 3114 } 3115 3116 // CHECK-LABEL: define <2 x i32> @test_veor_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3117 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b 3118 // CHECK: ret <2 x i32> [[XOR_I]] 3119 uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) { 3120 return veor_u32(a, b); 3121 } 3122 3123 // CHECK-LABEL: define <1 x i64> @test_veor_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3124 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b 3125 // CHECK: ret <1 x i64> [[XOR_I]] 3126 uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) { 3127 return veor_u64(a, b); 3128 } 3129 3130 // CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3131 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b 3132 // CHECK: ret <16 x i8> [[XOR_I]] 3133 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) { 3134 return veorq_s8(a, b); 3135 } 3136 3137 // CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3138 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b 3139 // CHECK: ret <8 x i16> [[XOR_I]] 3140 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) { 3141 return veorq_s16(a, b); 3142 } 3143 3144 // CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3145 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b 3146 // CHECK: ret <4 x i32> [[XOR_I]] 3147 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) { 3148 return veorq_s32(a, b); 3149 } 3150 3151 // CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3152 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b 3153 // CHECK: ret <2 x i64> [[XOR_I]] 3154 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) { 3155 return veorq_s64(a, b); 3156 } 3157 3158 // CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3159 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b 3160 // CHECK: ret <16 x i8> [[XOR_I]] 3161 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) { 3162 return veorq_u8(a, b); 3163 } 3164 3165 // CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3166 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b 3167 // CHECK: ret <8 x i16> [[XOR_I]] 3168 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) { 3169 return veorq_u16(a, b); 3170 } 3171 3172 // CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3173 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b 3174 // CHECK: ret <4 x i32> [[XOR_I]] 3175 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) { 3176 return veorq_u32(a, b); 3177 } 3178 3179 // CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3180 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b 3181 // CHECK: ret <2 x i64> [[XOR_I]] 3182 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) { 3183 return veorq_u64(a, b); 3184 } 3185 3186 3187 // CHECK-LABEL: define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3188 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3189 // CHECK: ret <8 x i8> [[VEXT]] 3190 int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) { 3191 return vext_s8(a, b, 7); 3192 } 3193 3194 // CHECK-LABEL: define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3195 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3196 // CHECK: ret <8 x i8> [[VEXT]] 3197 uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) { 3198 return vext_u8(a, b, 7); 3199 } 3200 3201 // CHECK-LABEL: define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) #0 { 3202 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3203 // CHECK: ret <8 x i8> [[VEXT]] 3204 poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) { 3205 return vext_p8(a, b, 7); 3206 } 3207 3208 // CHECK-LABEL: define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3211 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3212 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3213 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3214 // CHECK: ret <4 x i16> [[VEXT]] 3215 int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) { 3216 return vext_s16(a, b, 3); 3217 } 3218 3219 // CHECK-LABEL: define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3220 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3221 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3222 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3223 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3224 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3225 // CHECK: ret <4 x i16> [[VEXT]] 3226 uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) { 3227 return vext_u16(a, b, 3); 3228 } 3229 3230 // CHECK-LABEL: define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) #0 { 3231 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3232 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3233 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3234 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3235 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3236 // CHECK: ret <4 x i16> [[VEXT]] 3237 poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) { 3238 return vext_p16(a, b, 3); 3239 } 3240 3241 // CHECK-LABEL: define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3242 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3243 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3244 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3245 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3246 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2> 3247 // CHECK: ret <2 x i32> [[VEXT]] 3248 int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) { 3249 return vext_s32(a, b, 1); 3250 } 3251 3252 // CHECK-LABEL: define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3254 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3255 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3256 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3257 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2> 3258 // CHECK: ret <2 x i32> [[VEXT]] 3259 uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) { 3260 return vext_u32(a, b, 1); 3261 } 3262 3263 // CHECK-LABEL: define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) #0 { 3264 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3265 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3266 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3267 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3268 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 3269 // CHECK: ret <1 x i64> [[VEXT]] 3270 int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) { 3271 return vext_s64(a, b, 0); 3272 } 3273 3274 // CHECK-LABEL: define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) #0 { 3275 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3276 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3277 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3278 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3279 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 3280 // CHECK: ret <1 x i64> [[VEXT]] 3281 uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) { 3282 return vext_u64(a, b, 0); 3283 } 3284 3285 // CHECK-LABEL: define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) #0 { 3286 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3287 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3288 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3289 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3290 // CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2> 3291 // CHECK: ret <2 x float> [[VEXT]] 3292 float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) { 3293 return vext_f32(a, b, 1); 3294 } 3295 3296 // CHECK-LABEL: define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3297 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 3298 // CHECK: ret <16 x i8> [[VEXT]] 3299 int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) { 3300 return vextq_s8(a, b, 15); 3301 } 3302 3303 // CHECK-LABEL: define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3304 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 3305 // CHECK: ret <16 x i8> [[VEXT]] 3306 uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) { 3307 return vextq_u8(a, b, 15); 3308 } 3309 3310 // CHECK-LABEL: define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) #0 { 3311 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30> 3312 // CHECK: ret <16 x i8> [[VEXT]] 3313 poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) { 3314 return vextq_p8(a, b, 15); 3315 } 3316 3317 // CHECK-LABEL: define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3318 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3319 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3320 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3321 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3322 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3323 // CHECK: ret <8 x i16> [[VEXT]] 3324 int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) { 3325 return vextq_s16(a, b, 7); 3326 } 3327 3328 // CHECK-LABEL: define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3329 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3330 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3331 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3332 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3333 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3334 // CHECK: ret <8 x i16> [[VEXT]] 3335 uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) { 3336 return vextq_u16(a, b, 7); 3337 } 3338 3339 // CHECK-LABEL: define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) #0 { 3340 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3341 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3342 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3343 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3344 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 3345 // CHECK: ret <8 x i16> [[VEXT]] 3346 poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) { 3347 return vextq_p16(a, b, 7); 3348 } 3349 3350 // CHECK-LABEL: define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3351 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3352 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3353 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3354 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3355 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3356 // CHECK: ret <4 x i32> [[VEXT]] 3357 int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) { 3358 return vextq_s32(a, b, 3); 3359 } 3360 3361 // CHECK-LABEL: define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3362 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3363 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3364 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3365 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3366 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3367 // CHECK: ret <4 x i32> [[VEXT]] 3368 uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) { 3369 return vextq_u32(a, b, 3); 3370 } 3371 3372 // CHECK-LABEL: define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 3373 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3374 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3375 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3376 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3377 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2> 3378 // CHECK: ret <2 x i64> [[VEXT]] 3379 int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) { 3380 return vextq_s64(a, b, 1); 3381 } 3382 3383 // CHECK-LABEL: define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 3384 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3385 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3386 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3387 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3388 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2> 3389 // CHECK: ret <2 x i64> [[VEXT]] 3390 uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) { 3391 return vextq_u64(a, b, 1); 3392 } 3393 3394 // CHECK-LABEL: define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) #0 { 3395 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3396 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 3397 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3398 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3399 // CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 3400 // CHECK: ret <4 x float> [[VEXT]] 3401 float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) { 3402 return vextq_f32(a, b, 3); 3403 } 3404 3405 3406 // CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 3407 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3408 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3409 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> 3410 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3411 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3412 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 3413 // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 3414 // CHECK: ret <2 x float> [[TMP6]] 3415 float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 3416 return vfma_f32(a, b, c); 3417 } 3418 3419 // CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 3420 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3421 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 3422 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> 3423 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3424 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3425 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 3426 // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 3427 // CHECK: ret <4 x float> [[TMP6]] 3428 float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { 3429 return vfmaq_f32(a, b, c); 3430 } 3431 3432 // CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 3433 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b 3434 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3435 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> 3436 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> 3437 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3438 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3439 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 3440 // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 3441 // CHECK: ret <2 x float> [[TMP6]] 3442 float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 3443 return vfms_f32(a, b, c); 3444 } 3445 3446 // CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 3447 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b 3448 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3449 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> 3450 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> 3451 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3452 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3453 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 3454 // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 3455 // CHECK: ret <4 x float> [[TMP6]] 3456 float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { 3457 return vfmsq_f32(a, b, c); 3458 } 3459 3460 3461 // CHECK-LABEL: define <8 x i8> @test_vget_high_s8(<16 x i8> %a) #0 { 3462 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3463 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 3464 int8x8_t test_vget_high_s8(int8x16_t a) { 3465 return vget_high_s8(a); 3466 } 3467 3468 // CHECK-LABEL: define <4 x i16> @test_vget_high_s16(<8 x i16> %a) #0 { 3469 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3470 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 3471 int16x4_t test_vget_high_s16(int16x8_t a) { 3472 return vget_high_s16(a); 3473 } 3474 3475 // CHECK-LABEL: define <2 x i32> @test_vget_high_s32(<4 x i32> %a) #0 { 3476 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 3477 // CHECK: ret <2 x i32> [[SHUFFLE_I]] 3478 int32x2_t test_vget_high_s32(int32x4_t a) { 3479 return vget_high_s32(a); 3480 } 3481 3482 // CHECK-LABEL: define <1 x i64> @test_vget_high_s64(<2 x i64> %a) #0 { 3483 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1> 3484 // CHECK: ret <1 x i64> [[SHUFFLE_I]] 3485 int64x1_t test_vget_high_s64(int64x2_t a) { 3486 return vget_high_s64(a); 3487 } 3488 3489 // CHECK-LABEL: define <4 x half> @test_vget_high_f16(<8 x half> %a) #0 { 3490 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3491 // CHECK: ret <4 x half> [[SHUFFLE_I]] 3492 float16x4_t test_vget_high_f16(float16x8_t a) { 3493 return vget_high_f16(a); 3494 } 3495 3496 // CHECK-LABEL: define <2 x float> @test_vget_high_f32(<4 x float> %a) #0 { 3497 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3> 3498 // CHECK: ret <2 x float> [[SHUFFLE_I]] 3499 float32x2_t test_vget_high_f32(float32x4_t a) { 3500 return vget_high_f32(a); 3501 } 3502 3503 // CHECK-LABEL: define <8 x i8> @test_vget_high_u8(<16 x i8> %a) #0 { 3504 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3505 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 3506 uint8x8_t test_vget_high_u8(uint8x16_t a) { 3507 return vget_high_u8(a); 3508 } 3509 3510 // CHECK-LABEL: define <4 x i16> @test_vget_high_u16(<8 x i16> %a) #0 { 3511 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3512 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 3513 uint16x4_t test_vget_high_u16(uint16x8_t a) { 3514 return vget_high_u16(a); 3515 } 3516 3517 // CHECK-LABEL: define <2 x i32> @test_vget_high_u32(<4 x i32> %a) #0 { 3518 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 3519 // CHECK: ret <2 x i32> [[SHUFFLE_I]] 3520 uint32x2_t test_vget_high_u32(uint32x4_t a) { 3521 return vget_high_u32(a); 3522 } 3523 3524 // CHECK-LABEL: define <1 x i64> @test_vget_high_u64(<2 x i64> %a) #0 { 3525 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1> 3526 // CHECK: ret <1 x i64> [[SHUFFLE_I]] 3527 uint64x1_t test_vget_high_u64(uint64x2_t a) { 3528 return vget_high_u64(a); 3529 } 3530 3531 // CHECK-LABEL: define <8 x i8> @test_vget_high_p8(<16 x i8> %a) #0 { 3532 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3533 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 3534 poly8x8_t test_vget_high_p8(poly8x16_t a) { 3535 return vget_high_p8(a); 3536 } 3537 3538 // CHECK-LABEL: define <4 x i16> @test_vget_high_p16(<8 x i16> %a) #0 { 3539 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3540 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 3541 poly16x4_t test_vget_high_p16(poly16x8_t a) { 3542 return vget_high_p16(a); 3543 } 3544 3545 3546 // CHECK-LABEL: define zeroext i8 @test_vget_lane_u8(<8 x i8> %a) #0 { 3547 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 3548 // CHECK: ret i8 [[VGET_LANE]] 3549 uint8_t test_vget_lane_u8(uint8x8_t a) { 3550 return vget_lane_u8(a, 7); 3551 } 3552 3553 // CHECK-LABEL: define zeroext i16 @test_vget_lane_u16(<4 x i16> %a) #0 { 3554 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3555 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3556 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 3557 // CHECK: ret i16 [[VGET_LANE]] 3558 uint16_t test_vget_lane_u16(uint16x4_t a) { 3559 return vget_lane_u16(a, 3); 3560 } 3561 3562 // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 { 3563 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3564 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3565 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 3566 // CHECK: ret i32 [[VGET_LANE]] 3567 uint32_t test_vget_lane_u32(uint32x2_t a) { 3568 return vget_lane_u32(a, 1); 3569 } 3570 3571 // CHECK-LABEL: define signext i8 @test_vget_lane_s8(<8 x i8> %a) #0 { 3572 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 3573 // CHECK: ret i8 [[VGET_LANE]] 3574 int8_t test_vget_lane_s8(int8x8_t a) { 3575 return vget_lane_s8(a, 7); 3576 } 3577 3578 // CHECK-LABEL: define signext i16 @test_vget_lane_s16(<4 x i16> %a) #0 { 3579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3580 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3581 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 3582 // CHECK: ret i16 [[VGET_LANE]] 3583 int16_t test_vget_lane_s16(int16x4_t a) { 3584 return vget_lane_s16(a, 3); 3585 } 3586 3587 // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 { 3588 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3589 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3590 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 3591 // CHECK: ret i32 [[VGET_LANE]] 3592 int32_t test_vget_lane_s32(int32x2_t a) { 3593 return vget_lane_s32(a, 1); 3594 } 3595 3596 // CHECK-LABEL: define signext i8 @test_vget_lane_p8(<8 x i8> %a) #0 { 3597 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 3598 // CHECK: ret i8 [[VGET_LANE]] 3599 poly8_t test_vget_lane_p8(poly8x8_t a) { 3600 return vget_lane_p8(a, 7); 3601 } 3602 3603 // CHECK-LABEL: define signext i16 @test_vget_lane_p16(<4 x i16> %a) #0 { 3604 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3605 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3606 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 3607 // CHECK: ret i16 [[VGET_LANE]] 3608 poly16_t test_vget_lane_p16(poly16x4_t a) { 3609 return vget_lane_p16(a, 3); 3610 } 3611 3612 // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 { 3613 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3614 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3615 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 3616 // CHECK: ret float [[VGET_LANE]] 3617 float32_t test_vget_lane_f32(float32x2_t a) { 3618 return vget_lane_f32(a, 1); 3619 } 3620 3621 // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 { 3622 // CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8 3623 // CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2 3624 // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8 3625 // CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>* 3626 // CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 3627 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> 3628 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 3629 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1 3630 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2 3631 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half* 3632 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 3633 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float 3634 // CHECK: ret float [[CONV]] 3635 float32_t test_vget_lane_f16(float16x4_t a) { 3636 return vget_lane_f16(a, 1); 3637 } 3638 3639 // CHECK-LABEL: define zeroext i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 { 3640 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 3641 // CHECK: ret i8 [[VGET_LANE]] 3642 uint8_t test_vgetq_lane_u8(uint8x16_t a) { 3643 return vgetq_lane_u8(a, 15); 3644 } 3645 3646 // CHECK-LABEL: define zeroext i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 { 3647 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3648 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3649 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 3650 // CHECK: ret i16 [[VGET_LANE]] 3651 uint16_t test_vgetq_lane_u16(uint16x8_t a) { 3652 return vgetq_lane_u16(a, 7); 3653 } 3654 3655 // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 { 3656 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3657 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3658 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 3659 // CHECK: ret i32 [[VGET_LANE]] 3660 uint32_t test_vgetq_lane_u32(uint32x4_t a) { 3661 return vgetq_lane_u32(a, 3); 3662 } 3663 3664 // CHECK-LABEL: define signext i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 { 3665 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 3666 // CHECK: ret i8 [[VGET_LANE]] 3667 int8_t test_vgetq_lane_s8(int8x16_t a) { 3668 return vgetq_lane_s8(a, 15); 3669 } 3670 3671 // CHECK-LABEL: define signext i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 { 3672 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3673 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3674 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 3675 // CHECK: ret i16 [[VGET_LANE]] 3676 int16_t test_vgetq_lane_s16(int16x8_t a) { 3677 return vgetq_lane_s16(a, 7); 3678 } 3679 3680 // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 { 3681 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3682 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3683 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 3684 // CHECK: ret i32 [[VGET_LANE]] 3685 int32_t test_vgetq_lane_s32(int32x4_t a) { 3686 return vgetq_lane_s32(a, 3); 3687 } 3688 3689 // CHECK-LABEL: define signext i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 { 3690 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 3691 // CHECK: ret i8 [[VGET_LANE]] 3692 poly8_t test_vgetq_lane_p8(poly8x16_t a) { 3693 return vgetq_lane_p8(a, 15); 3694 } 3695 3696 // CHECK-LABEL: define signext i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 { 3697 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3698 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3699 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 3700 // CHECK: ret i16 [[VGET_LANE]] 3701 poly16_t test_vgetq_lane_p16(poly16x8_t a) { 3702 return vgetq_lane_p16(a, 7); 3703 } 3704 3705 // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 { 3706 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3707 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3708 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 3709 // CHECK: ret float [[VGET_LANE]] 3710 float32_t test_vgetq_lane_f32(float32x4_t a) { 3711 return vgetq_lane_f32(a, 3); 3712 } 3713 3714 // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 { 3715 // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16 3716 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2 3717 // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 3718 // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>* 3719 // CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16 3720 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8> 3721 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 3722 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 3723 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2 3724 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half* 3725 // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 3726 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float 3727 // CHECK: ret float [[CONV]] 3728 float32_t test_vgetq_lane_f16(float16x8_t a) { 3729 return vgetq_lane_f16(a, 3); 3730 } 3731 3732 // The optimizer is able to remove all moves now. 3733 // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 { 3734 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3735 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3736 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 3737 // CHECK: ret i64 [[VGET_LANE]] 3738 int64_t test_vget_lane_s64(int64x1_t a) { 3739 return vget_lane_s64(a, 0); 3740 } 3741 3742 // The optimizer is able to remove all moves now. 3743 // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 { 3744 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3745 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3746 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 3747 // CHECK: ret i64 [[VGET_LANE]] 3748 uint64_t test_vget_lane_u64(uint64x1_t a) { 3749 return vget_lane_u64(a, 0); 3750 } 3751 3752 // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 { 3753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3754 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3755 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 3756 // CHECK: ret i64 [[VGET_LANE]] 3757 int64_t test_vgetq_lane_s64(int64x2_t a) { 3758 return vgetq_lane_s64(a, 1); 3759 } 3760 3761 // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 { 3762 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3763 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3764 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 3765 // CHECK: ret i64 [[VGET_LANE]] 3766 uint64_t test_vgetq_lane_u64(uint64x2_t a) { 3767 return vgetq_lane_u64(a, 1); 3768 } 3769 3770 3771 // CHECK-LABEL: define <8 x i8> @test_vget_low_s8(<16 x i8> %a) #0 { 3772 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3773 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 3774 int8x8_t test_vget_low_s8(int8x16_t a) { 3775 return vget_low_s8(a); 3776 } 3777 3778 // CHECK-LABEL: define <4 x i16> @test_vget_low_s16(<8 x i16> %a) #0 { 3779 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3780 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 3781 int16x4_t test_vget_low_s16(int16x8_t a) { 3782 return vget_low_s16(a); 3783 } 3784 3785 // CHECK-LABEL: define <2 x i32> @test_vget_low_s32(<4 x i32> %a) #0 { 3786 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1> 3787 // CHECK: ret <2 x i32> [[SHUFFLE_I]] 3788 int32x2_t test_vget_low_s32(int32x4_t a) { 3789 return vget_low_s32(a); 3790 } 3791 3792 // CHECK-LABEL: define <1 x i64> @test_vget_low_s64(<2 x i64> %a) #0 { 3793 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer 3794 // CHECK: ret <1 x i64> [[SHUFFLE_I]] 3795 int64x1_t test_vget_low_s64(int64x2_t a) { 3796 return vget_low_s64(a); 3797 } 3798 3799 // CHECK-LABEL: define <4 x half> @test_vget_low_f16(<8 x half> %a) #0 { 3800 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3801 // CHECK: ret <4 x half> [[SHUFFLE_I]] 3802 float16x4_t test_vget_low_f16(float16x8_t a) { 3803 return vget_low_f16(a); 3804 } 3805 3806 // CHECK-LABEL: define <2 x float> @test_vget_low_f32(<4 x float> %a) #0 { 3807 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1> 3808 // CHECK: ret <2 x float> [[SHUFFLE_I]] 3809 float32x2_t test_vget_low_f32(float32x4_t a) { 3810 return vget_low_f32(a); 3811 } 3812 3813 // CHECK-LABEL: define <8 x i8> @test_vget_low_u8(<16 x i8> %a) #0 { 3814 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3815 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 3816 uint8x8_t test_vget_low_u8(uint8x16_t a) { 3817 return vget_low_u8(a); 3818 } 3819 3820 // CHECK-LABEL: define <4 x i16> @test_vget_low_u16(<8 x i16> %a) #0 { 3821 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3822 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 3823 uint16x4_t test_vget_low_u16(uint16x8_t a) { 3824 return vget_low_u16(a); 3825 } 3826 3827 // CHECK-LABEL: define <2 x i32> @test_vget_low_u32(<4 x i32> %a) #0 { 3828 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1> 3829 // CHECK: ret <2 x i32> [[SHUFFLE_I]] 3830 uint32x2_t test_vget_low_u32(uint32x4_t a) { 3831 return vget_low_u32(a); 3832 } 3833 3834 // CHECK-LABEL: define <1 x i64> @test_vget_low_u64(<2 x i64> %a) #0 { 3835 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer 3836 // CHECK: ret <1 x i64> [[SHUFFLE_I]] 3837 uint64x1_t test_vget_low_u64(uint64x2_t a) { 3838 return vget_low_u64(a); 3839 } 3840 3841 // CHECK-LABEL: define <8 x i8> @test_vget_low_p8(<16 x i8> %a) #0 { 3842 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3843 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 3844 poly8x8_t test_vget_low_p8(poly8x16_t a) { 3845 return vget_low_p8(a); 3846 } 3847 3848 // CHECK-LABEL: define <4 x i16> @test_vget_low_p16(<8 x i16> %a) #0 { 3849 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3850 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 3851 poly16x4_t test_vget_low_p16(poly16x8_t a) { 3852 return vget_low_p16(a); 3853 } 3854 3855 3856 // CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3857 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3858 // CHECK: ret <8 x i8> [[VHADD_V_I]] 3859 int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) { 3860 return vhadd_s8(a, b); 3861 } 3862 3863 // CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3864 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3865 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3866 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3867 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3868 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 3869 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 3870 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> 3871 // CHECK: ret <4 x i16> [[TMP2]] 3872 int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) { 3873 return vhadd_s16(a, b); 3874 } 3875 3876 // CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 3877 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3878 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3879 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3880 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3881 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 3882 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 3883 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> 3884 // CHECK: ret <2 x i32> [[TMP2]] 3885 int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) { 3886 return vhadd_s32(a, b); 3887 } 3888 3889 // CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 3890 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3891 // CHECK: ret <8 x i8> [[VHADD_V_I]] 3892 uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) { 3893 return vhadd_u8(a, b); 3894 } 3895 3896 // CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 3897 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3898 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3899 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3900 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3901 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 3902 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 3903 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> 3904 // CHECK: ret <4 x i16> [[TMP2]] 3905 uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) { 3906 return vhadd_u16(a, b); 3907 } 3908 3909 // CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 3910 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3911 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3912 // CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3913 // CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3914 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 3915 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 3916 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> 3917 // CHECK: ret <2 x i32> [[TMP2]] 3918 uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) { 3919 return vhadd_u32(a, b); 3920 } 3921 3922 // CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 3923 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3924 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] 3925 int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) { 3926 return vhaddq_s8(a, b); 3927 } 3928 3929 // CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 3930 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3931 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3932 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3933 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3934 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 3935 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 3936 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> 3937 // CHECK: ret <8 x i16> [[TMP2]] 3938 int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) { 3939 return vhaddq_s16(a, b); 3940 } 3941 3942 // CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 3943 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3944 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3945 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3946 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3947 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 3948 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 3949 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> 3950 // CHECK: ret <4 x i32> [[TMP2]] 3951 int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) { 3952 return vhaddq_s32(a, b); 3953 } 3954 3955 // CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 3956 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 3957 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] 3958 uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) { 3959 return vhaddq_u8(a, b); 3960 } 3961 3962 // CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 3963 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3964 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3965 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3966 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3967 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 3968 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 3969 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> 3970 // CHECK: ret <8 x i16> [[TMP2]] 3971 uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) { 3972 return vhaddq_u16(a, b); 3973 } 3974 3975 // CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 3976 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3977 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3978 // CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3979 // CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3980 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 3981 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 3982 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> 3983 // CHECK: ret <4 x i32> [[TMP2]] 3984 uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) { 3985 return vhaddq_u32(a, b); 3986 } 3987 3988 3989 // CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { 3990 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 3991 // CHECK: ret <8 x i8> [[VHSUB_V_I]] 3992 int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) { 3993 return vhsub_s8(a, b); 3994 } 3995 3996 // CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { 3997 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3998 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3999 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4000 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4001 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 4002 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 4003 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> 4004 // CHECK: ret <4 x i16> [[TMP2]] 4005 int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) { 4006 return vhsub_s16(a, b); 4007 } 4008 4009 // CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { 4010 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4011 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4012 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4013 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4014 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 4015 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 4016 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> 4017 // CHECK: ret <2 x i32> [[TMP2]] 4018 int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) { 4019 return vhsub_s32(a, b); 4020 } 4021 4022 // CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { 4023 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 4024 // CHECK: ret <8 x i8> [[VHSUB_V_I]] 4025 uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) { 4026 return vhsub_u8(a, b); 4027 } 4028 4029 // CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { 4030 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4031 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4032 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4033 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4034 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 4035 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 4036 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> 4037 // CHECK: ret <4 x i16> [[TMP2]] 4038 uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) { 4039 return vhsub_u16(a, b); 4040 } 4041 4042 // CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { 4043 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4044 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4045 // CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4046 // CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4047 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 4048 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 4049 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> 4050 // CHECK: ret <2 x i32> [[TMP2]] 4051 uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) { 4052 return vhsub_u32(a, b); 4053 } 4054 4055 // CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 4056 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4057 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 4058 int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) { 4059 return vhsubq_s8(a, b); 4060 } 4061 4062 // CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 4063 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4064 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4065 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4066 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4067 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 4068 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 4069 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> 4070 // CHECK: ret <8 x i16> [[TMP2]] 4071 int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) { 4072 return vhsubq_s16(a, b); 4073 } 4074 4075 // CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 4076 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4077 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4078 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4079 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4080 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 4081 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 4082 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> 4083 // CHECK: ret <4 x i32> [[TMP2]] 4084 int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) { 4085 return vhsubq_s32(a, b); 4086 } 4087 4088 // CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 4089 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 4090 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 4091 uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) { 4092 return vhsubq_u8(a, b); 4093 } 4094 4095 // CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 4096 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4097 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4098 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4099 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4100 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 4101 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 4102 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> 4103 // CHECK: ret <8 x i16> [[TMP2]] 4104 uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) { 4105 return vhsubq_u16(a, b); 4106 } 4107 4108 // CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 4109 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4110 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4111 // CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4112 // CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4113 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 4114 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 4115 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> 4116 // CHECK: ret <4 x i32> [[TMP2]] 4117 uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) { 4118 return vhsubq_u32(a, b); 4119 } 4120 4121 4122 // CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 { 4123 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) 4124 // CHECK: ret <16 x i8> [[VLD1]] 4125 uint8x16_t test_vld1q_u8(uint8_t const * a) { 4126 return vld1q_u8(a); 4127 } 4128 4129 // CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 { 4130 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4131 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) 4132 // CHECK: ret <8 x i16> [[VLD1]] 4133 uint16x8_t test_vld1q_u16(uint16_t const * a) { 4134 return vld1q_u16(a); 4135 } 4136 4137 // CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 { 4138 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4139 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4) 4140 // CHECK: ret <4 x i32> [[VLD1]] 4141 uint32x4_t test_vld1q_u32(uint32_t const * a) { 4142 return vld1q_u32(a); 4143 } 4144 4145 // CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 { 4146 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4147 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4) 4148 // CHECK: ret <2 x i64> [[VLD1]] 4149 uint64x2_t test_vld1q_u64(uint64_t const * a) { 4150 return vld1q_u64(a); 4151 } 4152 4153 // CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 { 4154 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) 4155 // CHECK: ret <16 x i8> [[VLD1]] 4156 int8x16_t test_vld1q_s8(int8_t const * a) { 4157 return vld1q_s8(a); 4158 } 4159 4160 // CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 { 4161 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4162 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) 4163 // CHECK: ret <8 x i16> [[VLD1]] 4164 int16x8_t test_vld1q_s16(int16_t const * a) { 4165 return vld1q_s16(a); 4166 } 4167 4168 // CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 { 4169 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4170 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4) 4171 // CHECK: ret <4 x i32> [[VLD1]] 4172 int32x4_t test_vld1q_s32(int32_t const * a) { 4173 return vld1q_s32(a); 4174 } 4175 4176 // CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 { 4177 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4178 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4) 4179 // CHECK: ret <2 x i64> [[VLD1]] 4180 int64x2_t test_vld1q_s64(int64_t const * a) { 4181 return vld1q_s64(a); 4182 } 4183 4184 // CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 { 4185 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4186 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) 4187 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VLD1]] to <8 x half> 4188 // CHECK: ret <8 x half> [[TMP1]] 4189 float16x8_t test_vld1q_f16(float16_t const * a) { 4190 return vld1q_f16(a); 4191 } 4192 4193 // CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 { 4194 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4195 // CHECK: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* [[TMP0]], i32 4) 4196 // CHECK: ret <4 x float> [[VLD1]] 4197 float32x4_t test_vld1q_f32(float32_t const * a) { 4198 return vld1q_f32(a); 4199 } 4200 4201 // CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 { 4202 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) 4203 // CHECK: ret <16 x i8> [[VLD1]] 4204 poly8x16_t test_vld1q_p8(poly8_t const * a) { 4205 return vld1q_p8(a); 4206 } 4207 4208 // CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 { 4209 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4210 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) 4211 // CHECK: ret <8 x i16> [[VLD1]] 4212 poly16x8_t test_vld1q_p16(poly16_t const * a) { 4213 return vld1q_p16(a); 4214 } 4215 4216 // CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 { 4217 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) 4218 // CHECK: ret <8 x i8> [[VLD1]] 4219 uint8x8_t test_vld1_u8(uint8_t const * a) { 4220 return vld1_u8(a); 4221 } 4222 4223 // CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 { 4224 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4225 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) 4226 // CHECK: ret <4 x i16> [[VLD1]] 4227 uint16x4_t test_vld1_u16(uint16_t const * a) { 4228 return vld1_u16(a); 4229 } 4230 4231 // CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 { 4232 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4233 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4) 4234 // CHECK: ret <2 x i32> [[VLD1]] 4235 uint32x2_t test_vld1_u32(uint32_t const * a) { 4236 return vld1_u32(a); 4237 } 4238 4239 // CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 { 4240 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4241 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) 4242 // CHECK: ret <1 x i64> [[VLD1]] 4243 uint64x1_t test_vld1_u64(uint64_t const * a) { 4244 return vld1_u64(a); 4245 } 4246 4247 // CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 { 4248 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) 4249 // CHECK: ret <8 x i8> [[VLD1]] 4250 int8x8_t test_vld1_s8(int8_t const * a) { 4251 return vld1_s8(a); 4252 } 4253 4254 // CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 { 4255 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4256 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) 4257 // CHECK: ret <4 x i16> [[VLD1]] 4258 int16x4_t test_vld1_s16(int16_t const * a) { 4259 return vld1_s16(a); 4260 } 4261 4262 // CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 { 4263 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4264 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4) 4265 // CHECK: ret <2 x i32> [[VLD1]] 4266 int32x2_t test_vld1_s32(int32_t const * a) { 4267 return vld1_s32(a); 4268 } 4269 4270 // CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 { 4271 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4272 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) 4273 // CHECK: ret <1 x i64> [[VLD1]] 4274 int64x1_t test_vld1_s64(int64_t const * a) { 4275 return vld1_s64(a); 4276 } 4277 4278 // CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 { 4279 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4280 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) 4281 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VLD1]] to <4 x half> 4282 // CHECK: ret <4 x half> [[TMP1]] 4283 float16x4_t test_vld1_f16(float16_t const * a) { 4284 return vld1_f16(a); 4285 } 4286 4287 // CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 { 4288 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4289 // CHECK: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* [[TMP0]], i32 4) 4290 // CHECK: ret <2 x float> [[VLD1]] 4291 float32x2_t test_vld1_f32(float32_t const * a) { 4292 return vld1_f32(a); 4293 } 4294 4295 // CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 { 4296 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) 4297 // CHECK: ret <8 x i8> [[VLD1]] 4298 poly8x8_t test_vld1_p8(poly8_t const * a) { 4299 return vld1_p8(a); 4300 } 4301 4302 // CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 { 4303 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4304 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) 4305 // CHECK: ret <4 x i16> [[VLD1]] 4306 poly16x4_t test_vld1_p16(poly16_t const * a) { 4307 return vld1_p16(a); 4308 } 4309 4310 4311 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_u8(i8* %a) #0 { 4312 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4313 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 4314 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 4315 // CHECK: ret <16 x i8> [[LANE]] 4316 uint8x16_t test_vld1q_dup_u8(uint8_t const * a) { 4317 return vld1q_dup_u8(a); 4318 } 4319 4320 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_u16(i16* %a) #0 { 4321 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4322 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4323 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4324 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 4325 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 4326 // CHECK: ret <8 x i16> [[LANE]] 4327 uint16x8_t test_vld1q_dup_u16(uint16_t const * a) { 4328 return vld1q_dup_u16(a); 4329 } 4330 4331 // CHECK-LABEL: define <4 x i32> @test_vld1q_dup_u32(i32* %a) #0 { 4332 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4333 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 4334 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 4335 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 4336 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 4337 // CHECK: ret <4 x i32> [[LANE]] 4338 uint32x4_t test_vld1q_dup_u32(uint32_t const * a) { 4339 return vld1q_dup_u32(a); 4340 } 4341 4342 // CHECK-LABEL: define <2 x i64> @test_vld1q_dup_u64(i64* %a) #0 { 4343 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4344 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 4345 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 4346 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 4347 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 4348 // CHECK: ret <2 x i64> [[LANE]] 4349 uint64x2_t test_vld1q_dup_u64(uint64_t const * a) { 4350 return vld1q_dup_u64(a); 4351 } 4352 4353 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_s8(i8* %a) #0 { 4354 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4355 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 4356 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 4357 // CHECK: ret <16 x i8> [[LANE]] 4358 int8x16_t test_vld1q_dup_s8(int8_t const * a) { 4359 return vld1q_dup_s8(a); 4360 } 4361 4362 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_s16(i16* %a) #0 { 4363 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4364 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4365 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4366 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 4367 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 4368 // CHECK: ret <8 x i16> [[LANE]] 4369 int16x8_t test_vld1q_dup_s16(int16_t const * a) { 4370 return vld1q_dup_s16(a); 4371 } 4372 4373 // CHECK-LABEL: define <4 x i32> @test_vld1q_dup_s32(i32* %a) #0 { 4374 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4375 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 4376 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 4377 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0 4378 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 4379 // CHECK: ret <4 x i32> [[LANE]] 4380 int32x4_t test_vld1q_dup_s32(int32_t const * a) { 4381 return vld1q_dup_s32(a); 4382 } 4383 4384 // CHECK-LABEL: define <2 x i64> @test_vld1q_dup_s64(i64* %a) #0 { 4385 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4386 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 4387 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 4388 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0 4389 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 4390 // CHECK: ret <2 x i64> [[LANE]] 4391 int64x2_t test_vld1q_dup_s64(int64_t const * a) { 4392 return vld1q_dup_s64(a); 4393 } 4394 4395 // CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(half* %a) #0 { 4396 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4397 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4398 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4399 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 4400 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 4401 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half> 4402 // CHECK: ret <8 x half> [[TMP4]] 4403 float16x8_t test_vld1q_dup_f16(float16_t const * a) { 4404 return vld1q_dup_f16(a); 4405 } 4406 4407 // CHECK-LABEL: define <4 x float> @test_vld1q_dup_f32(float* %a) #0 { 4408 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4409 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* 4410 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4 4411 // CHECK: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 4412 // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer 4413 // CHECK: ret <4 x float> [[LANE]] 4414 float32x4_t test_vld1q_dup_f32(float32_t const * a) { 4415 return vld1q_dup_f32(a); 4416 } 4417 4418 // CHECK-LABEL: define <16 x i8> @test_vld1q_dup_p8(i8* %a) #0 { 4419 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4420 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 4421 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 4422 // CHECK: ret <16 x i8> [[LANE]] 4423 poly8x16_t test_vld1q_dup_p8(poly8_t const * a) { 4424 return vld1q_dup_p8(a); 4425 } 4426 4427 // CHECK-LABEL: define <8 x i16> @test_vld1q_dup_p16(i16* %a) #0 { 4428 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4429 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4430 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4431 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0 4432 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 4433 // CHECK: ret <8 x i16> [[LANE]] 4434 poly16x8_t test_vld1q_dup_p16(poly16_t const * a) { 4435 return vld1q_dup_p16(a); 4436 } 4437 4438 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 { 4439 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4440 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 4441 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 4442 // CHECK: ret <8 x i8> [[LANE]] 4443 uint8x8_t test_vld1_dup_u8(uint8_t const * a) { 4444 return vld1_dup_u8(a); 4445 } 4446 4447 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 { 4448 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4449 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4450 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4451 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 4452 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 4453 // CHECK: ret <4 x i16> [[LANE]] 4454 uint16x4_t test_vld1_dup_u16(uint16_t const * a) { 4455 return vld1_dup_u16(a); 4456 } 4457 4458 // CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 { 4459 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4460 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 4461 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 4462 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 4463 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer 4464 // CHECK: ret <2 x i32> [[LANE]] 4465 uint32x2_t test_vld1_dup_u32(uint32_t const * a) { 4466 return vld1_dup_u32(a); 4467 } 4468 4469 // CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 { 4470 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4471 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 4472 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 4473 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0 4474 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 4475 // CHECK: ret <1 x i64> [[LANE]] 4476 uint64x1_t test_vld1_dup_u64(uint64_t const * a) { 4477 return vld1_dup_u64(a); 4478 } 4479 4480 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 { 4481 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4482 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 4483 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 4484 // CHECK: ret <8 x i8> [[LANE]] 4485 int8x8_t test_vld1_dup_s8(int8_t const * a) { 4486 return vld1_dup_s8(a); 4487 } 4488 4489 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 { 4490 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4491 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4492 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4493 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 4494 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 4495 // CHECK: ret <4 x i16> [[LANE]] 4496 int16x4_t test_vld1_dup_s16(int16_t const * a) { 4497 return vld1_dup_s16(a); 4498 } 4499 4500 // CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 { 4501 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4502 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* 4503 // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 4504 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0 4505 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer 4506 // CHECK: ret <2 x i32> [[LANE]] 4507 int32x2_t test_vld1_dup_s32(int32_t const * a) { 4508 return vld1_dup_s32(a); 4509 } 4510 4511 // CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 { 4512 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4513 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* 4514 // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 4515 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0 4516 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 4517 // CHECK: ret <1 x i64> [[LANE]] 4518 int64x1_t test_vld1_dup_s64(int64_t const * a) { 4519 return vld1_dup_s64(a); 4520 } 4521 4522 // CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 { 4523 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4524 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4525 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4526 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 4527 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 4528 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half> 4529 // CHECK: ret <4 x half> [[TMP4]] 4530 float16x4_t test_vld1_dup_f16(float16_t const * a) { 4531 return vld1_dup_f16(a); 4532 } 4533 4534 // CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 { 4535 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4536 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* 4537 // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4 4538 // CHECK: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0 4539 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer 4540 // CHECK: ret <2 x float> [[LANE]] 4541 float32x2_t test_vld1_dup_f32(float32_t const * a) { 4542 return vld1_dup_f32(a); 4543 } 4544 4545 // CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 { 4546 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4547 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 4548 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 4549 // CHECK: ret <8 x i8> [[LANE]] 4550 poly8x8_t test_vld1_dup_p8(poly8_t const * a) { 4551 return vld1_dup_p8(a); 4552 } 4553 4554 // CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 { 4555 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4556 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* 4557 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 4558 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0 4559 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 4560 // CHECK: ret <4 x i16> [[LANE]] 4561 poly16x4_t test_vld1_dup_p16(poly16_t const * a) { 4562 return vld1_dup_p16(a); 4563 } 4564 4565 4566 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_u8(i8* %a, <16 x i8> %b) #0 { 4567 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4568 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 4569 // CHECK: ret <16 x i8> [[VLD1_LANE]] 4570 uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) { 4571 return vld1q_lane_u8(a, b, 15); 4572 } 4573 4574 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_u16(i16* %a, <8 x i16> %b) #0 { 4575 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4576 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4577 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4578 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4579 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4580 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 4581 // CHECK: ret <8 x i16> [[VLD1_LANE]] 4582 uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) { 4583 return vld1q_lane_u16(a, b, 7); 4584 } 4585 4586 // CHECK-LABEL: define <4 x i32> @test_vld1q_lane_u32(i32* %a, <4 x i32> %b) #0 { 4587 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4588 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4589 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4590 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 4591 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 4592 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3 4593 // CHECK: ret <4 x i32> [[VLD1_LANE]] 4594 uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) { 4595 return vld1q_lane_u32(a, b, 3); 4596 } 4597 4598 // CHECK-LABEL: define <2 x i64> @test_vld1q_lane_u64(i64* %a, <2 x i64> %b) #0 { 4599 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4600 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4601 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4602 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer 4603 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) 4604 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1> 4605 // CHECK: ret <2 x i64> [[VLD1Q_LANE]] 4606 uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) { 4607 return vld1q_lane_u64(a, b, 1); 4608 } 4609 4610 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) #0 { 4611 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4612 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 4613 // CHECK: ret <16 x i8> [[VLD1_LANE]] 4614 int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) { 4615 return vld1q_lane_s8(a, b, 15); 4616 } 4617 4618 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) #0 { 4619 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4621 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4622 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4623 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4624 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 4625 // CHECK: ret <8 x i16> [[VLD1_LANE]] 4626 int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) { 4627 return vld1q_lane_s16(a, b, 7); 4628 } 4629 4630 // CHECK-LABEL: define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) #0 { 4631 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4632 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4633 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4634 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 4635 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 4636 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3 4637 // CHECK: ret <4 x i32> [[VLD1_LANE]] 4638 int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) { 4639 return vld1q_lane_s32(a, b, 3); 4640 } 4641 4642 // CHECK-LABEL: define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) #0 { 4643 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4644 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4645 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4646 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer 4647 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) 4648 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1> 4649 // CHECK: ret <2 x i64> [[VLD1Q_LANE]] 4650 int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) { 4651 return vld1q_lane_s64(a, b, 1); 4652 } 4653 4654 // CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(half* %a, <8 x half> %b) #0 { 4655 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4656 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 4657 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4658 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4659 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4660 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 4661 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[VLD1_LANE]] to <8 x half> 4662 // CHECK: ret <8 x half> [[TMP5]] 4663 float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) { 4664 return vld1q_lane_f16(a, b, 7); 4665 } 4666 4667 // CHECK-LABEL: define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) #0 { 4668 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4669 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4670 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 4671 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float* 4672 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4 4673 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3 4674 // CHECK: ret <4 x float> [[VLD1_LANE]] 4675 float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) { 4676 return vld1q_lane_f32(a, b, 3); 4677 } 4678 4679 // CHECK-LABEL: define <16 x i8> @test_vld1q_lane_p8(i8* %a, <16 x i8> %b) #0 { 4680 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4681 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 4682 // CHECK: ret <16 x i8> [[VLD1_LANE]] 4683 poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) { 4684 return vld1q_lane_p8(a, b, 15); 4685 } 4686 4687 // CHECK-LABEL: define <8 x i16> @test_vld1q_lane_p16(i16* %a, <8 x i16> %b) #0 { 4688 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4689 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4690 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4691 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4692 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4693 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 4694 // CHECK: ret <8 x i16> [[VLD1_LANE]] 4695 poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) { 4696 return vld1q_lane_p16(a, b, 7); 4697 } 4698 4699 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 { 4700 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4701 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 4702 // CHECK: ret <8 x i8> [[VLD1_LANE]] 4703 uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) { 4704 return vld1_lane_u8(a, b, 7); 4705 } 4706 4707 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 { 4708 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4709 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4710 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4711 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4712 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4713 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 4714 // CHECK: ret <4 x i16> [[VLD1_LANE]] 4715 uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) { 4716 return vld1_lane_u16(a, b, 3); 4717 } 4718 4719 // CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 { 4720 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4721 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4722 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4723 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 4724 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 4725 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1 4726 // CHECK: ret <2 x i32> [[VLD1_LANE]] 4727 uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) { 4728 return vld1_lane_u32(a, b, 1); 4729 } 4730 4731 // CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 { 4732 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4733 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4734 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4735 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 4736 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4 4737 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 4738 // CHECK: ret <1 x i64> [[VLD1_LANE]] 4739 uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) { 4740 return vld1_lane_u64(a, b, 0); 4741 } 4742 4743 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 { 4744 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4745 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 4746 // CHECK: ret <8 x i8> [[VLD1_LANE]] 4747 int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) { 4748 return vld1_lane_s8(a, b, 7); 4749 } 4750 4751 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 { 4752 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4753 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4754 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4755 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4756 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4757 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 4758 // CHECK: ret <4 x i16> [[VLD1_LANE]] 4759 int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) { 4760 return vld1_lane_s16(a, b, 3); 4761 } 4762 4763 // CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 { 4764 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 4765 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4766 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4767 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32* 4768 // CHECK: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 4769 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1 4770 // CHECK: ret <2 x i32> [[VLD1_LANE]] 4771 int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) { 4772 return vld1_lane_s32(a, b, 1); 4773 } 4774 4775 // CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 { 4776 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 4777 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 4778 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 4779 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64* 4780 // CHECK: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4 4781 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 4782 // CHECK: ret <1 x i64> [[VLD1_LANE]] 4783 int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) { 4784 return vld1_lane_s64(a, b, 0); 4785 } 4786 4787 // CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 { 4788 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 4789 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 4790 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4791 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4792 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4793 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 4794 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[VLD1_LANE]] to <4 x half> 4795 // CHECK: ret <4 x half> [[TMP5]] 4796 float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) { 4797 return vld1_lane_f16(a, b, 3); 4798 } 4799 4800 // CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 { 4801 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 4802 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4803 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 4804 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float* 4805 // CHECK: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4 4806 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1 4807 // CHECK: ret <2 x float> [[VLD1_LANE]] 4808 float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) { 4809 return vld1_lane_f32(a, b, 1); 4810 } 4811 4812 // CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 { 4813 // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 4814 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 4815 // CHECK: ret <8 x i8> [[VLD1_LANE]] 4816 poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) { 4817 return vld1_lane_p8(a, b, 7); 4818 } 4819 4820 // CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 { 4821 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 4822 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4823 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4824 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16* 4825 // CHECK: [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2 4826 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 4827 // CHECK: ret <4 x i16> [[VLD1_LANE]] 4828 poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) { 4829 return vld1_lane_p16(a, b, 3); 4830 } 4831 4832 4833 // CHECK-LABEL: define void @test_vld2q_u8(%struct.uint8x16x2_t* noalias sret %agg.result, i8* %a) #0 { 4834 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 4835 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 4836 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) 4837 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 4838 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] 4839 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* 4840 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* 4841 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) 4842 // CHECK: ret void 4843 uint8x16x2_t test_vld2q_u8(uint8_t const * a) { 4844 return vld2q_u8(a); 4845 } 4846 4847 // CHECK-LABEL: define void @test_vld2q_u16(%struct.uint16x8x2_t* noalias sret %agg.result, i16* %a) #0 { 4848 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 4849 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 4850 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 4851 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) 4852 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 4853 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] 4854 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 4855 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 4856 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4857 // CHECK: ret void 4858 uint16x8x2_t test_vld2q_u16(uint16_t const * a) { 4859 return vld2q_u16(a); 4860 } 4861 4862 // CHECK-LABEL: define void @test_vld2q_u32(%struct.uint32x4x2_t* noalias sret %agg.result, i32* %a) #0 { 4863 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 4864 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 4865 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 4866 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4) 4867 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 4868 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_V]], { <4 x i32>, <4 x i32> }* [[TMP2]] 4869 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 4870 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 4871 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4872 // CHECK: ret void 4873 uint32x4x2_t test_vld2q_u32(uint32_t const * a) { 4874 return vld2q_u32(a); 4875 } 4876 4877 // CHECK-LABEL: define void @test_vld2q_s8(%struct.int8x16x2_t* noalias sret %agg.result, i8* %a) #0 { 4878 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 4879 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 4880 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) 4881 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 4882 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] 4883 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* 4884 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* 4885 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) 4886 // CHECK: ret void 4887 int8x16x2_t test_vld2q_s8(int8_t const * a) { 4888 return vld2q_s8(a); 4889 } 4890 4891 // CHECK-LABEL: define void @test_vld2q_s16(%struct.int16x8x2_t* noalias sret %agg.result, i16* %a) #0 { 4892 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 4893 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 4894 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 4895 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) 4896 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 4897 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] 4898 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 4899 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 4900 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4901 // CHECK: ret void 4902 int16x8x2_t test_vld2q_s16(int16_t const * a) { 4903 return vld2q_s16(a); 4904 } 4905 4906 // CHECK-LABEL: define void @test_vld2q_s32(%struct.int32x4x2_t* noalias sret %agg.result, i32* %a) #0 { 4907 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 4908 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 4909 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 4910 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4) 4911 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* 4912 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_V]], { <4 x i32>, <4 x i32> }* [[TMP2]] 4913 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 4914 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 4915 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4916 // CHECK: ret void 4917 int32x4x2_t test_vld2q_s32(int32_t const * a) { 4918 return vld2q_s32(a); 4919 } 4920 4921 // CHECK-LABEL: define void @test_vld2q_f16(%struct.float16x8x2_t* noalias sret %agg.result, half* %a) #0 { 4922 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 4923 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 4924 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 4925 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) 4926 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 4927 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] 4928 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* %agg.result to i8* 4929 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 4930 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4931 // CHECK: ret void 4932 float16x8x2_t test_vld2q_f16(float16_t const * a) { 4933 return vld2q_f16(a); 4934 } 4935 4936 // CHECK-LABEL: define void @test_vld2q_f32(%struct.float32x4x2_t* noalias sret %agg.result, float* %a) #0 { 4937 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 4938 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 4939 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 4940 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* [[TMP1]], i32 4) 4941 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* 4942 // CHECK: store { <4 x float>, <4 x float> } [[VLD2Q_V]], { <4 x float>, <4 x float> }* [[TMP2]] 4943 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 4944 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 4945 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4946 // CHECK: ret void 4947 float32x4x2_t test_vld2q_f32(float32_t const * a) { 4948 return vld2q_f32(a); 4949 } 4950 4951 // CHECK-LABEL: define void @test_vld2q_p8(%struct.poly8x16x2_t* noalias sret %agg.result, i8* %a) #0 { 4952 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 4953 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 4954 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) 4955 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* 4956 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] 4957 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* 4958 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* 4959 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) 4960 // CHECK: ret void 4961 poly8x16x2_t test_vld2q_p8(poly8_t const * a) { 4962 return vld2q_p8(a); 4963 } 4964 4965 // CHECK-LABEL: define void @test_vld2q_p16(%struct.poly16x8x2_t* noalias sret %agg.result, i16* %a) #0 { 4966 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 4967 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 4968 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 4969 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) 4970 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* 4971 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] 4972 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 4973 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 4974 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) 4975 // CHECK: ret void 4976 poly16x8x2_t test_vld2q_p16(poly16_t const * a) { 4977 return vld2q_p16(a); 4978 } 4979 4980 // CHECK-LABEL: define void @test_vld2_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 4981 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 4982 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 4983 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) 4984 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 4985 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] 4986 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 4987 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 4988 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) 4989 // CHECK: ret void 4990 uint8x8x2_t test_vld2_u8(uint8_t const * a) { 4991 return vld2_u8(a); 4992 } 4993 4994 // CHECK-LABEL: define void @test_vld2_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 4995 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 4996 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 4997 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 4998 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) 4999 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5000 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] 5001 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 5002 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5003 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5004 // CHECK: ret void 5005 uint16x4x2_t test_vld2_u16(uint16_t const * a) { 5006 return vld2_u16(a); 5007 } 5008 5009 // CHECK-LABEL: define void @test_vld2_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a) #0 { 5010 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 5011 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5012 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5013 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4) 5014 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 5015 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_V]], { <2 x i32>, <2 x i32> }* [[TMP2]] 5016 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 5017 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5018 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5019 // CHECK: ret void 5020 uint32x2x2_t test_vld2_u32(uint32_t const * a) { 5021 return vld2_u32(a); 5022 } 5023 5024 // CHECK-LABEL: define void @test_vld2_u64(%struct.uint64x1x2_t* noalias sret %agg.result, i64* %a) #0 { 5025 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 5026 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 5027 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 5028 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) 5029 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 5030 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_V]], { <1 x i64>, <1 x i64> }* [[TMP2]] 5031 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x2_t* %agg.result to i8* 5032 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 5033 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5034 // CHECK: ret void 5035 uint64x1x2_t test_vld2_u64(uint64_t const * a) { 5036 return vld2_u64(a); 5037 } 5038 5039 // CHECK-LABEL: define void @test_vld2_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5040 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 5041 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5042 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) 5043 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5044 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] 5045 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 5046 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5047 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) 5048 // CHECK: ret void 5049 int8x8x2_t test_vld2_s8(int8_t const * a) { 5050 return vld2_s8(a); 5051 } 5052 5053 // CHECK-LABEL: define void @test_vld2_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5054 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 5055 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5056 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5057 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) 5058 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5059 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] 5060 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 5061 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5062 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5063 // CHECK: ret void 5064 int16x4x2_t test_vld2_s16(int16_t const * a) { 5065 return vld2_s16(a); 5066 } 5067 5068 // CHECK-LABEL: define void @test_vld2_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a) #0 { 5069 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 5070 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5071 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5072 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4) 5073 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 5074 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_V]], { <2 x i32>, <2 x i32> }* [[TMP2]] 5075 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 5076 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5077 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5078 // CHECK: ret void 5079 int32x2x2_t test_vld2_s32(int32_t const * a) { 5080 return vld2_s32(a); 5081 } 5082 5083 // CHECK-LABEL: define void @test_vld2_s64(%struct.int64x1x2_t* noalias sret %agg.result, i64* %a) #0 { 5084 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 5085 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 5086 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 5087 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) 5088 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 5089 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_V]], { <1 x i64>, <1 x i64> }* [[TMP2]] 5090 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x2_t* %agg.result to i8* 5091 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 5092 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5093 // CHECK: ret void 5094 int64x1x2_t test_vld2_s64(int64_t const * a) { 5095 return vld2_s64(a); 5096 } 5097 5098 // CHECK-LABEL: define void @test_vld2_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a) #0 { 5099 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 5100 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5101 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 5102 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) 5103 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5104 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] 5105 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* 5106 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5107 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5108 // CHECK: ret void 5109 float16x4x2_t test_vld2_f16(float16_t const * a) { 5110 return vld2_f16(a); 5111 } 5112 5113 // CHECK-LABEL: define void @test_vld2_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a) #0 { 5114 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 5115 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5116 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 5117 // CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32.p0i8(i8* [[TMP1]], i32 4) 5118 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 5119 // CHECK: store { <2 x float>, <2 x float> } [[VLD2_V]], { <2 x float>, <2 x float> }* [[TMP2]] 5120 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 5121 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5122 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5123 // CHECK: ret void 5124 float32x2x2_t test_vld2_f32(float32_t const * a) { 5125 return vld2_f32(a); 5126 } 5127 5128 // CHECK-LABEL: define void @test_vld2_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5129 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 5130 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5131 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) 5132 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5133 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] 5134 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 5135 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5136 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) 5137 // CHECK: ret void 5138 poly8x8x2_t test_vld2_p8(poly8_t const * a) { 5139 return vld2_p8(a); 5140 } 5141 5142 // CHECK-LABEL: define void @test_vld2_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5143 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 5144 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5145 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5146 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) 5147 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5148 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] 5149 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 5150 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5151 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5152 // CHECK: ret void 5153 poly16x4x2_t test_vld2_p16(poly16_t const * a) { 5154 return vld2_p16(a); 5155 } 5156 5157 5158 // CHECK-LABEL: define void @test_vld2_dup_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5159 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 5160 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 5161 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 5162 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 5163 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 5164 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 5165 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 5166 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 5167 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 5168 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5169 // CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] 5170 // CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 5171 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 5172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) 5173 // CHECK: ret void 5174 uint8x8x2_t test_vld2_dup_u8(uint8_t const * a) { 5175 return vld2_dup_u8(a); 5176 } 5177 5178 // CHECK-LABEL: define void @test_vld2_dup_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5179 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 5180 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5181 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5182 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 5183 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 5184 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 5185 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 5186 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 5187 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 5188 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 5189 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5190 // CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] 5191 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 5192 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5193 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5194 // CHECK: ret void 5195 uint16x4x2_t test_vld2_dup_u16(uint16_t const * a) { 5196 return vld2_dup_u16(a); 5197 } 5198 5199 // CHECK-LABEL: define void @test_vld2_dup_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a) #0 { 5200 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 5201 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5202 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5203 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 5204 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 5205 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 5206 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 5207 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP3]], 1 5208 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 5209 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 5210 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 5211 // CHECK: store { <2 x i32>, <2 x i32> } [[TMP5]], { <2 x i32>, <2 x i32> }* [[TMP6]] 5212 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 5213 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5214 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5215 // CHECK: ret void 5216 uint32x2x2_t test_vld2_dup_u32(uint32_t const * a) { 5217 return vld2_dup_u32(a); 5218 } 5219 5220 // CHECK-LABEL: define void @test_vld2_dup_u64(%struct.uint64x1x2_t* noalias sret %agg.result, i64* %a) #0 { 5221 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 5222 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 5223 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 5224 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) 5225 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 5226 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64> }* [[TMP2]] 5227 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x2_t* %agg.result to i8* 5228 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* 5229 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5230 // CHECK: ret void 5231 uint64x1x2_t test_vld2_dup_u64(uint64_t const * a) { 5232 return vld2_dup_u64(a); 5233 } 5234 5235 // CHECK-LABEL: define void @test_vld2_dup_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5236 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 5237 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5238 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 5239 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 5240 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 5241 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 5242 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 5243 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 5244 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 5245 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5246 // CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] 5247 // CHECK: [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 5248 // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5249 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) 5250 // CHECK: ret void 5251 int8x8x2_t test_vld2_dup_s8(int8_t const * a) { 5252 return vld2_dup_s8(a); 5253 } 5254 5255 // CHECK-LABEL: define void @test_vld2_dup_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5256 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 5257 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5258 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5259 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 5260 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 5261 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 5262 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 5263 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 5264 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 5265 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 5266 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5267 // CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] 5268 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 5269 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5270 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5271 // CHECK: ret void 5272 int16x4x2_t test_vld2_dup_s16(int16_t const * a) { 5273 return vld2_dup_s16(a); 5274 } 5275 5276 // CHECK-LABEL: define void @test_vld2_dup_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a) #0 { 5277 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 5278 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5279 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5280 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 5281 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 5282 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 5283 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 5284 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP3]], 1 5285 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 5286 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 5287 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* 5288 // CHECK: store { <2 x i32>, <2 x i32> } [[TMP5]], { <2 x i32>, <2 x i32> }* [[TMP6]] 5289 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 5290 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5291 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5292 // CHECK: ret void 5293 int32x2x2_t test_vld2_dup_s32(int32_t const * a) { 5294 return vld2_dup_s32(a); 5295 } 5296 5297 // CHECK-LABEL: define void @test_vld2_dup_s64(%struct.int64x1x2_t* noalias sret %agg.result, i64* %a) #0 { 5298 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 5299 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 5300 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 5301 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) 5302 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* 5303 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64> }* [[TMP2]] 5304 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x2_t* %agg.result to i8* 5305 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* 5306 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) 5307 // CHECK: ret void 5308 int64x1x2_t test_vld2_dup_s64(int64_t const * a) { 5309 return vld2_dup_s64(a); 5310 } 5311 5312 // CHECK-LABEL: define void @test_vld2_dup_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a) #0 { 5313 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 5314 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5315 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 5316 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 5317 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 5318 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 5319 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 5320 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 5321 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 5322 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 5323 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5324 // CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] 5325 // CHECK: [[TMP7:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* 5326 // CHECK: [[TMP8:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5327 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5328 // CHECK: ret void 5329 float16x4x2_t test_vld2_dup_f16(float16_t const * a) { 5330 return vld2_dup_f16(a); 5331 } 5332 5333 // CHECK-LABEL: define void @test_vld2_dup_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a) #0 { 5334 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 5335 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5336 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 5337 // CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, i32 0, i32 4) 5338 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD_DUP]], 0 5339 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer 5340 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 5341 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 5342 // CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer 5343 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 5344 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* 5345 // CHECK: store { <2 x float>, <2 x float> } [[TMP5]], { <2 x float>, <2 x float> }* [[TMP6]] 5346 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 5347 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5348 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5349 // CHECK: ret void 5350 float32x2x2_t test_vld2_dup_f32(float32_t const * a) { 5351 return vld2_dup_f32(a); 5352 } 5353 5354 // CHECK-LABEL: define void @test_vld2_dup_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a) #0 { 5355 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 5356 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5357 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 5358 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 5359 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 5360 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 5361 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 5362 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 5363 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 5364 // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* 5365 // CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] 5366 // CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 5367 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5368 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) 5369 // CHECK: ret void 5370 poly8x8x2_t test_vld2_dup_p8(poly8_t const * a) { 5371 return vld2_dup_p8(a); 5372 } 5373 5374 // CHECK-LABEL: define void @test_vld2_dup_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a) #0 { 5375 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 5376 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5377 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5378 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 5379 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 5380 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 5381 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 5382 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 5383 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 5384 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 5385 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* 5386 // CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] 5387 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 5388 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5389 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5390 // CHECK: ret void 5391 poly16x4x2_t test_vld2_dup_p16(poly16_t const * a) { 5392 return vld2_dup_p16(a); 5393 } 5394 5395 5396 // CHECK-LABEL: define void @test_vld2q_lane_u16(%struct.uint16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 5397 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 5398 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 5399 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 5400 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 5401 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 5402 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5403 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 5404 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 5405 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5406 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 5407 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5408 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 5409 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 5410 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5411 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5412 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 5413 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 5414 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5415 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 5416 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5417 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5418 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) 5419 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* 5420 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] 5421 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 5422 // CHECK: [[TMP13:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* 5423 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5424 // CHECK: ret void 5425 uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) { 5426 return vld2q_lane_u16(a, b, 7); 5427 } 5428 5429 // CHECK-LABEL: define void @test_vld2q_lane_u32(%struct.uint32x4x2_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { 5430 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 5431 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 5432 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 5433 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 5434 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 5435 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5436 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 5437 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 5438 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5439 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 5440 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 5441 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 5442 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 5443 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 5444 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 5445 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 5446 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 5447 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 5448 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 5449 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 5450 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 5451 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], i32 3, i32 4) 5452 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32> }* 5453 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], { <4 x i32>, <4 x i32> }* [[TMP11]] 5454 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 5455 // CHECK: [[TMP13:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* 5456 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5457 // CHECK: ret void 5458 uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) { 5459 return vld2q_lane_u32(a, b, 3); 5460 } 5461 5462 // CHECK-LABEL: define void @test_vld2q_lane_s16(%struct.int16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 5463 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 5464 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 5465 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 5466 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 5467 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 5468 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5469 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 5470 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 5471 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5472 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 5473 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5474 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 5475 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 5476 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5477 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5478 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 5479 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 5480 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5481 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 5482 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5483 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5484 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) 5485 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* 5486 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] 5487 // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 5488 // CHECK: [[TMP13:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* 5489 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5490 // CHECK: ret void 5491 int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) { 5492 return vld2q_lane_s16(a, b, 7); 5493 } 5494 5495 // CHECK-LABEL: define void @test_vld2q_lane_s32(%struct.int32x4x2_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { 5496 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 5497 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 5498 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 5499 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 5500 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 5501 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5502 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 5503 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 5504 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5505 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 5506 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 5507 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 5508 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 5509 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 5510 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 5511 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 5512 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 5513 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 5514 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 5515 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 5516 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 5517 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], i32 3, i32 4) 5518 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32> }* 5519 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], { <4 x i32>, <4 x i32> }* [[TMP11]] 5520 // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 5521 // CHECK: [[TMP13:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* 5522 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5523 // CHECK: ret void 5524 int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) { 5525 return vld2q_lane_s32(a, b, 3); 5526 } 5527 5528 // CHECK-LABEL: define void @test_vld2q_lane_f16(%struct.float16x8x2_t* noalias sret %agg.result, half* %a, [4 x i64] %b.coerce) #0 { 5529 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 5530 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 5531 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 5532 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 5533 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* 5534 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5535 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 5536 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 5537 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5538 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 5539 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 5540 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 5541 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 5542 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 5543 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 5544 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 5545 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 5546 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 5547 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 5548 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5549 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5550 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) 5551 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* 5552 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] 5553 // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* %agg.result to i8* 5554 // CHECK: [[TMP13:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* 5555 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5556 // CHECK: ret void 5557 float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) { 5558 return vld2q_lane_f16(a, b, 7); 5559 } 5560 5561 // CHECK-LABEL: define void @test_vld2q_lane_f32(%struct.float32x4x2_t* noalias sret %agg.result, float* %a, [4 x i64] %b.coerce) #0 { 5562 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 5563 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 5564 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 5565 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 5566 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* 5567 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5568 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 5569 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 5570 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5571 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 5572 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 5573 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 5574 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 5575 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 5576 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 5577 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 5578 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 5579 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 5580 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 5581 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 5582 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 5583 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], i32 3, i32 4) 5584 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float> }* 5585 // CHECK: store { <4 x float>, <4 x float> } [[VLD2Q_LANE_V]], { <4 x float>, <4 x float> }* [[TMP11]] 5586 // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 5587 // CHECK: [[TMP13:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* 5588 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5589 // CHECK: ret void 5590 float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) { 5591 return vld2q_lane_f32(a, b, 3); 5592 } 5593 5594 // CHECK-LABEL: define void @test_vld2q_lane_p16(%struct.poly16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 5595 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 5596 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 5597 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 5598 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 5599 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 5600 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 5601 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 5602 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 5603 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 5604 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 5605 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5606 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 5607 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 5608 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 5609 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5610 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 5611 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 5612 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 5613 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 5614 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5615 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5616 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) 5617 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* 5618 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] 5619 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 5620 // CHECK: [[TMP13:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* 5621 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) 5622 // CHECK: ret void 5623 poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) { 5624 return vld2q_lane_p16(a, b, 7); 5625 } 5626 5627 // CHECK-LABEL: define void @test_vld2_lane_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { 5628 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 5629 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 5630 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 5631 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 5632 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 5633 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5634 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 5635 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 5636 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5637 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 5638 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 5639 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 5640 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 5641 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 5642 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 5643 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 5644 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 5645 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* 5646 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] 5647 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 5648 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* 5649 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5650 // CHECK: ret void 5651 uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) { 5652 return vld2_lane_u8(a, b, 7); 5653 } 5654 5655 // CHECK-LABEL: define void @test_vld2_lane_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { 5656 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 5657 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 5658 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 5659 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 5660 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 5661 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5662 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 5663 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 5664 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5665 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5666 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5667 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 5668 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 5669 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 5670 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5671 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 5672 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 5673 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 5674 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5675 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5676 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5677 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) 5678 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* 5679 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] 5680 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 5681 // CHECK: [[TMP13:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* 5682 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5683 // CHECK: ret void 5684 uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) { 5685 return vld2_lane_u16(a, b, 3); 5686 } 5687 5688 // CHECK-LABEL: define void @test_vld2_lane_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a, [2 x i64] %b.coerce) #0 { 5689 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 5690 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 5691 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 5692 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 5693 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 5694 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5695 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 5696 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 5697 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5698 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5699 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 5700 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 5701 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 5702 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 5703 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 5704 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 5705 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 5706 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 5707 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 5708 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 5709 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 5710 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], i32 1, i32 4) 5711 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32> }* 5712 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], { <2 x i32>, <2 x i32> }* [[TMP11]] 5713 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 5714 // CHECK: [[TMP13:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* 5715 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5716 // CHECK: ret void 5717 uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) { 5718 return vld2_lane_u32(a, b, 1); 5719 } 5720 5721 // CHECK-LABEL: define void @test_vld2_lane_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { 5722 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 5723 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 5724 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 5725 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 5726 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 5727 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5728 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 5729 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 5730 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5731 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5732 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 5733 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 5734 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 5735 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 5736 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 5737 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 5738 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 5739 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* 5740 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] 5741 // CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 5742 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* 5743 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5744 // CHECK: ret void 5745 int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) { 5746 return vld2_lane_s8(a, b, 7); 5747 } 5748 5749 // CHECK-LABEL: define void @test_vld2_lane_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { 5750 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 5751 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 5752 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 5753 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 5754 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 5755 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5756 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 5757 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 5758 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5759 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5760 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5761 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 5762 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 5763 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 5764 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5765 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 5766 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 5767 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 5768 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5769 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5770 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5771 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) 5772 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* 5773 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] 5774 // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 5775 // CHECK: [[TMP13:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* 5776 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5777 // CHECK: ret void 5778 int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) { 5779 return vld2_lane_s16(a, b, 3); 5780 } 5781 5782 // CHECK-LABEL: define void @test_vld2_lane_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a, [2 x i64] %b.coerce) #0 { 5783 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 5784 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 5785 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 5786 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 5787 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 5788 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5789 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 5790 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 5791 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5792 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5793 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 5794 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 5795 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 5796 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 5797 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 5798 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 5799 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 5800 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 5801 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 5802 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 5803 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 5804 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], i32 1, i32 4) 5805 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32> }* 5806 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], { <2 x i32>, <2 x i32> }* [[TMP11]] 5807 // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 5808 // CHECK: [[TMP13:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* 5809 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5810 // CHECK: ret void 5811 int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) { 5812 return vld2_lane_s32(a, b, 1); 5813 } 5814 5815 // CHECK-LABEL: define void @test_vld2_lane_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a, [2 x i64] %b.coerce) #0 { 5816 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 5817 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 5818 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 5819 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 5820 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]* 5821 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5822 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 5823 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 5824 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5825 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5826 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 5827 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 5828 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0 5829 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 5830 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 5831 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 5832 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1 5833 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 5834 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 5835 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5836 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5837 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) 5838 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* 5839 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] 5840 // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* 5841 // CHECK: [[TMP13:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* 5842 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5843 // CHECK: ret void 5844 float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) { 5845 return vld2_lane_f16(a, b, 3); 5846 } 5847 5848 // CHECK-LABEL: define void @test_vld2_lane_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a, [2 x i64] %b.coerce) #0 { 5849 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 5850 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 5851 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 5852 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 5853 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]* 5854 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5855 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 5856 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 5857 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5858 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5859 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 5860 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 5861 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0 5862 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 5863 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 5864 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 5865 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1 5866 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 5867 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 5868 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 5869 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 5870 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], i32 1, i32 4) 5871 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float> }* 5872 // CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE_V]], { <2 x float>, <2 x float> }* [[TMP11]] 5873 // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 5874 // CHECK: [[TMP13:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* 5875 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5876 // CHECK: ret void 5877 float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) { 5878 return vld2_lane_f32(a, b, 1); 5879 } 5880 5881 // CHECK-LABEL: define void @test_vld2_lane_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { 5882 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 5883 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 5884 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 5885 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 5886 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 5887 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5888 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 5889 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 5890 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5891 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5892 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 5893 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 5894 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 5895 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 5896 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 5897 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 5898 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 5899 // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* 5900 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] 5901 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 5902 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* 5903 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) 5904 // CHECK: ret void 5905 poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) { 5906 return vld2_lane_p8(a, b, 7); 5907 } 5908 5909 // CHECK-LABEL: define void @test_vld2_lane_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { 5910 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 5911 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 5912 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 5913 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 5914 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 5915 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 5916 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 5917 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 5918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 5919 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5920 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 5921 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 5922 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 5923 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 5924 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5925 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 5926 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 5927 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 5928 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5929 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5930 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5931 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) 5932 // CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* 5933 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] 5934 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 5935 // CHECK: [[TMP13:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* 5936 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) 5937 // CHECK: ret void 5938 poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) { 5939 return vld2_lane_p16(a, b, 3); 5940 } 5941 5942 5943 // CHECK-LABEL: define void @test_vld3q_u8(%struct.uint8x16x3_t* noalias sret %agg.result, i8* %a) #0 { 5944 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 5945 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 5946 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) 5947 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 5948 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 5949 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* %agg.result to i8* 5950 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* 5951 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) 5952 // CHECK: ret void 5953 uint8x16x3_t test_vld3q_u8(uint8_t const * a) { 5954 return vld3q_u8(a); 5955 } 5956 5957 // CHECK-LABEL: define void @test_vld3q_u16(%struct.uint16x8x3_t* noalias sret %agg.result, i16* %a) #0 { 5958 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 5959 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 5960 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 5961 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) 5962 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 5963 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 5964 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* %agg.result to i8* 5965 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 5966 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 5967 // CHECK: ret void 5968 uint16x8x3_t test_vld3q_u16(uint16_t const * a) { 5969 return vld3q_u16(a); 5970 } 5971 5972 // CHECK-LABEL: define void @test_vld3q_u32(%struct.uint32x4x3_t* noalias sret %agg.result, i32* %a) #0 { 5973 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 5974 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 5975 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 5976 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4) 5977 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 5978 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] 5979 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* %agg.result to i8* 5980 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 5981 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 5982 // CHECK: ret void 5983 uint32x4x3_t test_vld3q_u32(uint32_t const * a) { 5984 return vld3q_u32(a); 5985 } 5986 5987 // CHECK-LABEL: define void @test_vld3q_s8(%struct.int8x16x3_t* noalias sret %agg.result, i8* %a) #0 { 5988 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 5989 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 5990 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) 5991 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 5992 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 5993 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* %agg.result to i8* 5994 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* 5995 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) 5996 // CHECK: ret void 5997 int8x16x3_t test_vld3q_s8(int8_t const * a) { 5998 return vld3q_s8(a); 5999 } 6000 6001 // CHECK-LABEL: define void @test_vld3q_s16(%struct.int16x8x3_t* noalias sret %agg.result, i16* %a) #0 { 6002 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 6003 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 6004 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6005 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) 6006 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6007 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 6008 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* %agg.result to i8* 6009 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 6010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6011 // CHECK: ret void 6012 int16x8x3_t test_vld3q_s16(int16_t const * a) { 6013 return vld3q_s16(a); 6014 } 6015 6016 // CHECK-LABEL: define void @test_vld3q_s32(%struct.int32x4x3_t* noalias sret %agg.result, i32* %a) #0 { 6017 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 6018 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 6019 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6020 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4) 6021 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 6022 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] 6023 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* %agg.result to i8* 6024 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 6025 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6026 // CHECK: ret void 6027 int32x4x3_t test_vld3q_s32(int32_t const * a) { 6028 return vld3q_s32(a); 6029 } 6030 6031 // CHECK-LABEL: define void @test_vld3q_f16(%struct.float16x8x3_t* noalias sret %agg.result, half* %a) #0 { 6032 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 6033 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 6034 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 6035 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) 6036 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6037 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 6038 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* %agg.result to i8* 6039 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 6040 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6041 // CHECK: ret void 6042 float16x8x3_t test_vld3q_f16(float16_t const * a) { 6043 return vld3q_f16(a); 6044 } 6045 6046 // CHECK-LABEL: define void @test_vld3q_f32(%struct.float32x4x3_t* noalias sret %agg.result, float* %a) #0 { 6047 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 6048 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 6049 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 6050 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32.p0i8(i8* [[TMP1]], i32 4) 6051 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* 6052 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_V]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP2]] 6053 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* %agg.result to i8* 6054 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 6055 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6056 // CHECK: ret void 6057 float32x4x3_t test_vld3q_f32(float32_t const * a) { 6058 return vld3q_f32(a); 6059 } 6060 6061 // CHECK-LABEL: define void @test_vld3q_p8(%struct.poly8x16x3_t* noalias sret %agg.result, i8* %a) #0 { 6062 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 6063 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 6064 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) 6065 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* 6066 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 6067 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* %agg.result to i8* 6068 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* 6069 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) 6070 // CHECK: ret void 6071 poly8x16x3_t test_vld3q_p8(poly8_t const * a) { 6072 return vld3q_p8(a); 6073 } 6074 6075 // CHECK-LABEL: define void @test_vld3q_p16(%struct.poly16x8x3_t* noalias sret %agg.result, i16* %a) #0 { 6076 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 6077 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 6078 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6079 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) 6080 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6081 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 6082 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* %agg.result to i8* 6083 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 6084 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) 6085 // CHECK: ret void 6086 poly16x8x3_t test_vld3q_p16(poly16_t const * a) { 6087 return vld3q_p16(a); 6088 } 6089 6090 // CHECK-LABEL: define void @test_vld3_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6091 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 6092 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6093 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) 6094 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6095 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 6096 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* 6097 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6098 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) 6099 // CHECK: ret void 6100 uint8x8x3_t test_vld3_u8(uint8_t const * a) { 6101 return vld3_u8(a); 6102 } 6103 6104 // CHECK-LABEL: define void @test_vld3_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6105 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 6106 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6107 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6108 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) 6109 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6110 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 6111 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* 6112 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6113 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6114 // CHECK: ret void 6115 uint16x4x3_t test_vld3_u16(uint16_t const * a) { 6116 return vld3_u16(a); 6117 } 6118 6119 // CHECK-LABEL: define void @test_vld3_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a) #0 { 6120 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 6121 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6122 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6123 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4) 6124 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6125 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] 6126 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* 6127 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6128 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6129 // CHECK: ret void 6130 uint32x2x3_t test_vld3_u32(uint32_t const * a) { 6131 return vld3_u32(a); 6132 } 6133 6134 // CHECK-LABEL: define void @test_vld3_u64(%struct.uint64x1x3_t* noalias sret %agg.result, i64* %a) #0 { 6135 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 6136 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 6137 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 6138 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) 6139 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 6140 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 6141 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x3_t* %agg.result to i8* 6142 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 6143 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6144 // CHECK: ret void 6145 uint64x1x3_t test_vld3_u64(uint64_t const * a) { 6146 return vld3_u64(a); 6147 } 6148 6149 // CHECK-LABEL: define void @test_vld3_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6150 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 6151 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6152 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) 6153 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6154 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 6155 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* 6156 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6157 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) 6158 // CHECK: ret void 6159 int8x8x3_t test_vld3_s8(int8_t const * a) { 6160 return vld3_s8(a); 6161 } 6162 6163 // CHECK-LABEL: define void @test_vld3_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6164 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 6165 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6166 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6167 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) 6168 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6169 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 6170 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* 6171 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6173 // CHECK: ret void 6174 int16x4x3_t test_vld3_s16(int16_t const * a) { 6175 return vld3_s16(a); 6176 } 6177 6178 // CHECK-LABEL: define void @test_vld3_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a) #0 { 6179 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 6180 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6181 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6182 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4) 6183 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6184 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] 6185 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* 6186 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6187 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6188 // CHECK: ret void 6189 int32x2x3_t test_vld3_s32(int32_t const * a) { 6190 return vld3_s32(a); 6191 } 6192 6193 // CHECK-LABEL: define void @test_vld3_s64(%struct.int64x1x3_t* noalias sret %agg.result, i64* %a) #0 { 6194 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 6195 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 6196 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 6197 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) 6198 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 6199 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 6200 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x3_t* %agg.result to i8* 6201 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 6202 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6203 // CHECK: ret void 6204 int64x1x3_t test_vld3_s64(int64_t const * a) { 6205 return vld3_s64(a); 6206 } 6207 6208 // CHECK-LABEL: define void @test_vld3_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a) #0 { 6209 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 6210 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 6211 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 6212 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) 6213 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6214 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 6215 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* 6216 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 6217 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6218 // CHECK: ret void 6219 float16x4x3_t test_vld3_f16(float16_t const * a) { 6220 return vld3_f16(a); 6221 } 6222 6223 // CHECK-LABEL: define void @test_vld3_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a) #0 { 6224 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 6225 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 6226 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 6227 // CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32.p0i8(i8* [[TMP1]], i32 4) 6228 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 6229 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_V]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP2]] 6230 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* 6231 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 6232 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6233 // CHECK: ret void 6234 float32x2x3_t test_vld3_f32(float32_t const * a) { 6235 return vld3_f32(a); 6236 } 6237 6238 // CHECK-LABEL: define void @test_vld3_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6239 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 6240 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 6241 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) 6242 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6243 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 6244 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* 6245 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 6246 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) 6247 // CHECK: ret void 6248 poly8x8x3_t test_vld3_p8(poly8_t const * a) { 6249 return vld3_p8(a); 6250 } 6251 6252 // CHECK-LABEL: define void @test_vld3_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6253 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 6254 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 6255 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6256 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) 6257 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6258 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 6259 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* 6260 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 6261 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6262 // CHECK: ret void 6263 poly16x4x3_t test_vld3_p16(poly16_t const * a) { 6264 return vld3_p16(a); 6265 } 6266 6267 6268 // CHECK-LABEL: define void @test_vld3_dup_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6269 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 6270 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6271 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 6272 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 6273 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 6274 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 6275 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 6276 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 6277 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 6278 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 6279 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 6280 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 6281 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6282 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6283 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* 6284 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6285 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6286 // CHECK: ret void 6287 uint8x8x3_t test_vld3_dup_u8(uint8_t const * a) { 6288 return vld3_dup_u8(a); 6289 } 6290 6291 // CHECK-LABEL: define void @test_vld3_dup_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6292 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 6293 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6294 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6295 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 6296 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 6297 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 6298 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 6299 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 6300 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 6301 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 6302 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 6303 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 6304 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 6305 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6306 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] 6307 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* 6308 // CHECK: [[TMP10:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6309 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6310 // CHECK: ret void 6311 uint16x4x3_t test_vld3_dup_u16(uint16_t const * a) { 6312 return vld3_dup_u16(a); 6313 } 6314 6315 // CHECK-LABEL: define void @test_vld3_dup_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a) #0 { 6316 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 6317 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6318 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6319 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 6320 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 6321 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 6322 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 6323 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 6324 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 6325 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 6326 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 6327 // CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer 6328 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 6329 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6330 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP8]] 6331 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* 6332 // CHECK: [[TMP10:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6333 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6334 // CHECK: ret void 6335 uint32x2x3_t test_vld3_dup_u32(uint32_t const * a) { 6336 return vld3_dup_u32(a); 6337 } 6338 6339 // CHECK-LABEL: define void @test_vld3_dup_u64(%struct.uint64x1x3_t* noalias sret %agg.result, i64* %a) #0 { 6340 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 6341 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 6342 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 6343 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) 6344 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 6345 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 6346 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x3_t* %agg.result to i8* 6347 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* 6348 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6349 // CHECK: ret void 6350 uint64x1x3_t test_vld3_dup_u64(uint64_t const * a) { 6351 return vld3_dup_u64(a); 6352 } 6353 6354 // CHECK-LABEL: define void @test_vld3_dup_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6355 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 6356 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6357 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 6358 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 6359 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 6360 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 6361 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 6362 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 6363 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 6364 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 6365 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 6366 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 6367 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6368 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6369 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* 6370 // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6371 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6372 // CHECK: ret void 6373 int8x8x3_t test_vld3_dup_s8(int8_t const * a) { 6374 return vld3_dup_s8(a); 6375 } 6376 6377 // CHECK-LABEL: define void @test_vld3_dup_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6378 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 6379 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6380 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6381 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 6382 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 6383 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 6384 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 6385 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 6386 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 6387 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 6388 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 6389 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 6390 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 6391 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6392 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] 6393 // CHECK: [[TMP9:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* 6394 // CHECK: [[TMP10:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6395 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6396 // CHECK: ret void 6397 int16x4x3_t test_vld3_dup_s16(int16_t const * a) { 6398 return vld3_dup_s16(a); 6399 } 6400 6401 // CHECK-LABEL: define void @test_vld3_dup_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a) #0 { 6402 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 6403 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6404 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 6405 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 6406 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 6407 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 6408 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 6409 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 6410 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 6411 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 6412 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 6413 // CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer 6414 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 6415 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6416 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP8]] 6417 // CHECK: [[TMP9:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* 6418 // CHECK: [[TMP10:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6419 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6420 // CHECK: ret void 6421 int32x2x3_t test_vld3_dup_s32(int32_t const * a) { 6422 return vld3_dup_s32(a); 6423 } 6424 6425 // CHECK-LABEL: define void @test_vld3_dup_s64(%struct.int64x1x3_t* noalias sret %agg.result, i64* %a) #0 { 6426 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 6427 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 6428 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 6429 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) 6430 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* 6431 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 6432 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x3_t* %agg.result to i8* 6433 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* 6434 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) 6435 // CHECK: ret void 6436 int64x1x3_t test_vld3_dup_s64(int64_t const * a) { 6437 return vld3_dup_s64(a); 6438 } 6439 6440 // CHECK-LABEL: define void @test_vld3_dup_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a) #0 { 6441 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 6442 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 6443 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 6444 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 6445 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 6446 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 6447 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 6448 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 6449 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 6450 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 6451 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 6452 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 6453 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 6454 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6455 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] 6456 // CHECK: [[TMP9:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* 6457 // CHECK: [[TMP10:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 6458 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6459 // CHECK: ret void 6460 float16x4x3_t test_vld3_dup_f16(float16_t const * a) { 6461 return vld3_dup_f16(a); 6462 } 6463 6464 // CHECK-LABEL: define void @test_vld3_dup_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a) #0 { 6465 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 6466 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 6467 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 6468 // CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) 6469 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], 0 6470 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer 6471 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 6472 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP3]], 1 6473 // CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer 6474 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 6475 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP5]], 2 6476 // CHECK: [[LANE2:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer 6477 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP5]], <2 x float> [[LANE2]], 2 6478 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* 6479 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[TMP7]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP8]] 6480 // CHECK: [[TMP9:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* 6481 // CHECK: [[TMP10:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 6482 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6483 // CHECK: ret void 6484 float32x2x3_t test_vld3_dup_f32(float32_t const * a) { 6485 return vld3_dup_f32(a); 6486 } 6487 6488 // CHECK-LABEL: define void @test_vld3_dup_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a) #0 { 6489 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 6490 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 6491 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 6492 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 6493 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 6494 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 6495 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 6496 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 6497 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 6498 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 6499 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 6500 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 6501 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6502 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6503 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* 6504 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 6505 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6506 // CHECK: ret void 6507 poly8x8x3_t test_vld3_dup_p8(poly8_t const * a) { 6508 return vld3_dup_p8(a); 6509 } 6510 6511 // CHECK-LABEL: define void @test_vld3_dup_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a) #0 { 6512 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 6513 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 6514 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 6515 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 6516 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 6517 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 6518 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 6519 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 6520 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 6521 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 6522 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 6523 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 6524 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 6525 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6526 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] 6527 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* 6528 // CHECK: [[TMP10:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 6529 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) 6530 // CHECK: ret void 6531 poly16x4x3_t test_vld3_dup_p16(poly16_t const * a) { 6532 return vld3_dup_p16(a); 6533 } 6534 6535 6536 // CHECK-LABEL: define void @test_vld3q_lane_u16(%struct.uint16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { 6537 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 6538 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 6539 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 6540 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 6541 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 6542 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6543 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 6544 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 6545 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6546 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 6547 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6548 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6549 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 6550 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6551 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6552 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6553 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 6554 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6555 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6556 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 6557 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 6558 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6559 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 6560 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6561 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6562 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 6563 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) 6564 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6565 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] 6566 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* %agg.result to i8* 6567 // CHECK: [[TMP16:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* 6568 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6569 // CHECK: ret void 6570 uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) { 6571 return vld3q_lane_u16(a, b, 7); 6572 } 6573 6574 // CHECK-LABEL: define void @test_vld3q_lane_u32(%struct.uint32x4x3_t* noalias sret %agg.result, i32* %a, [6 x i64] %b.coerce) #0 { 6575 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 6576 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 6577 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 6578 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 6579 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 6580 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6581 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 6582 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 6583 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6584 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 6585 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 6586 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6587 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 6588 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 6589 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 6590 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6591 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 6592 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 6593 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 6594 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 6595 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 6596 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 6597 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 6598 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 6599 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 6600 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 6601 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], i32 3, i32 4) 6602 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 6603 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP14]] 6604 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* %agg.result to i8* 6605 // CHECK: [[TMP16:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* 6606 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6607 // CHECK: ret void 6608 uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) { 6609 return vld3q_lane_u32(a, b, 3); 6610 } 6611 6612 // CHECK-LABEL: define void @test_vld3q_lane_s16(%struct.int16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { 6613 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 6614 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 6615 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 6616 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 6617 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 6618 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6619 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 6620 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 6621 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6622 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 6623 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6624 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6625 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 6626 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6627 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6628 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6629 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 6630 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6631 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6632 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 6633 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 6634 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6635 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 6636 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6637 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6638 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 6639 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) 6640 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6641 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] 6642 // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* %agg.result to i8* 6643 // CHECK: [[TMP16:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* 6644 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6645 // CHECK: ret void 6646 int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) { 6647 return vld3q_lane_s16(a, b, 7); 6648 } 6649 6650 // CHECK-LABEL: define void @test_vld3q_lane_s32(%struct.int32x4x3_t* noalias sret %agg.result, i32* %a, [6 x i64] %b.coerce) #0 { 6651 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 6652 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 6653 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 6654 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 6655 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 6656 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6657 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 6658 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 6659 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6660 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 6661 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 6662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 6664 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 6665 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 6666 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6667 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 6668 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 6669 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 6670 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 6671 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 6672 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 6673 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 6674 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 6675 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 6676 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 6677 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], i32 3, i32 4) 6678 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32> }* 6679 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP14]] 6680 // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* %agg.result to i8* 6681 // CHECK: [[TMP16:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* 6682 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6683 // CHECK: ret void 6684 int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) { 6685 return vld3q_lane_s32(a, b, 3); 6686 } 6687 6688 // CHECK-LABEL: define void @test_vld3q_lane_f16(%struct.float16x8x3_t* noalias sret %agg.result, half* %a, [6 x i64] %b.coerce) #0 { 6689 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 6690 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 6691 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 6692 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 6693 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* 6694 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6695 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 6696 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 6697 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6698 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 6699 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 6700 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6701 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 6702 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 6703 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 6704 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6705 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 6706 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 6707 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 6708 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 6709 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 6710 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 6711 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 6712 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6713 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6714 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 6715 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) 6716 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6717 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] 6718 // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* %agg.result to i8* 6719 // CHECK: [[TMP16:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* 6720 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6721 // CHECK: ret void 6722 float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) { 6723 return vld3q_lane_f16(a, b, 7); 6724 } 6725 6726 // CHECK-LABEL: define void @test_vld3q_lane_f32(%struct.float32x4x3_t* noalias sret %agg.result, float* %a, [6 x i64] %b.coerce) #0 { 6727 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 6728 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 6729 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 6730 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 6731 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* 6732 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6733 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 6734 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 6735 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6736 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 6737 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 6738 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6739 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 6740 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 6741 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 6742 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6743 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 6744 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 6745 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 6746 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 6747 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 6748 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 6749 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 6750 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 6751 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 6752 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 6753 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], i32 3, i32 4) 6754 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float>, <4 x float> }* 6755 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_LANE_V]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP14]] 6756 // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* %agg.result to i8* 6757 // CHECK: [[TMP16:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* 6758 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6759 // CHECK: ret void 6760 float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) { 6761 return vld3q_lane_f32(a, b, 3); 6762 } 6763 6764 // CHECK-LABEL: define void @test_vld3q_lane_p16(%struct.poly16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { 6765 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 6766 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 6767 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 6768 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 6769 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 6770 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 6771 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 6772 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 6773 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 6774 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 6775 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6776 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6777 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 6778 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 6779 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 6780 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6781 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 6782 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 6783 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 6784 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 6785 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 6786 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 6787 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 6788 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 6789 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 6790 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 6791 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) 6792 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* 6793 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] 6794 // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* %agg.result to i8* 6795 // CHECK: [[TMP16:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* 6796 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) 6797 // CHECK: ret void 6798 poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) { 6799 return vld3q_lane_p16(a, b, 7); 6800 } 6801 6802 // CHECK-LABEL: define void @test_vld3_lane_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { 6803 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 6804 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 6805 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 6806 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 6807 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 6808 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6809 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 6810 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 6811 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6812 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6813 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6814 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 6815 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6816 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6817 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 6818 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6819 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 6820 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 6821 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 6822 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 6823 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6824 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6825 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* 6826 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* 6827 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6828 // CHECK: ret void 6829 uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) { 6830 return vld3_lane_u8(a, b, 7); 6831 } 6832 6833 // CHECK-LABEL: define void @test_vld3_lane_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { 6834 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 6835 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 6836 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 6837 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 6838 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 6839 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6840 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 6841 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 6842 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6843 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6844 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6845 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6846 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 6847 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 6848 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 6849 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6850 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 6851 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 6852 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 6853 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 6854 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 6855 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 6856 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 6857 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6858 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 6859 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 6860 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) 6861 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6862 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] 6863 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* 6864 // CHECK: [[TMP16:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* 6865 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 6866 // CHECK: ret void 6867 uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) { 6868 return vld3_lane_u16(a, b, 3); 6869 } 6870 6871 // CHECK-LABEL: define void @test_vld3_lane_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a, [3 x i64] %b.coerce) #0 { 6872 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 6873 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 6874 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 6875 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 6876 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 6877 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6878 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 6879 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 6880 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6881 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6882 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 6883 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6884 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 6885 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 6886 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 6887 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6888 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 6889 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 6890 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 6891 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 6892 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 6893 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 6894 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 6895 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 6896 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 6897 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 6898 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], i32 1, i32 4) 6899 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 6900 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP14]] 6901 // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* 6902 // CHECK: [[TMP16:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* 6903 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 6904 // CHECK: ret void 6905 uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) { 6906 return vld3_lane_u32(a, b, 1); 6907 } 6908 6909 // CHECK-LABEL: define void @test_vld3_lane_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { 6910 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 6911 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 6912 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 6913 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 6914 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 6915 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6916 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 6917 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 6918 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6919 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6920 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 6922 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 6923 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6924 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 6925 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 6926 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 6927 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 6928 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 6929 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 6930 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 6931 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 6932 // CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* 6933 // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* 6934 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 6935 // CHECK: ret void 6936 int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) { 6937 return vld3_lane_s8(a, b, 7); 6938 } 6939 6940 // CHECK-LABEL: define void @test_vld3_lane_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { 6941 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 6942 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 6943 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 6944 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 6945 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 6946 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6947 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 6948 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 6949 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6950 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6951 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 6952 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6953 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 6954 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 6955 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 6956 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6957 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 6958 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 6959 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 6960 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 6961 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 6962 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 6963 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 6964 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 6965 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 6966 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 6967 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) 6968 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 6969 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] 6970 // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* 6971 // CHECK: [[TMP16:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* 6972 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 6973 // CHECK: ret void 6974 int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) { 6975 return vld3_lane_s16(a, b, 3); 6976 } 6977 6978 // CHECK-LABEL: define void @test_vld3_lane_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a, [3 x i64] %b.coerce) #0 { 6979 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 6980 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 6981 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 6982 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 6983 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 6984 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 6985 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 6986 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 6987 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 6988 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 6989 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 6990 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6991 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 6992 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 6993 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 6994 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6995 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 6996 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 6997 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 6998 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 6999 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 7000 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 7001 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 7002 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 7003 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 7004 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 7005 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], i32 1, i32 4) 7006 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32> }* 7007 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP14]] 7008 // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* 7009 // CHECK: [[TMP16:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* 7010 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 7011 // CHECK: ret void 7012 int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) { 7013 return vld3_lane_s32(a, b, 1); 7014 } 7015 7016 // CHECK-LABEL: define void @test_vld3_lane_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a, [3 x i64] %b.coerce) #0 { 7017 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 7018 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 7019 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 7020 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 7021 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]* 7022 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 7023 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 7024 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 7025 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 7026 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 7027 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 7028 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 7029 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0 7030 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 7031 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 7032 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 7033 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1 7034 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 7035 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 7036 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 7037 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2 7038 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 7039 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 7040 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7041 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7042 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 7043 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) 7044 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 7045 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] 7046 // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* 7047 // CHECK: [[TMP16:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* 7048 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 7049 // CHECK: ret void 7050 float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) { 7051 return vld3_lane_f16(a, b, 3); 7052 } 7053 7054 // CHECK-LABEL: define void @test_vld3_lane_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a, [3 x i64] %b.coerce) #0 { 7055 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 7056 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 7057 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 7058 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 7059 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]* 7060 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 7061 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 7062 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 7063 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 7064 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 7065 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 7066 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 7067 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0 7068 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 7069 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 7070 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 7071 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1 7072 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 7073 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 7074 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 7075 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2 7076 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 7077 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 7078 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 7079 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 7080 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 7081 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], i32 1, i32 4) 7082 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float>, <2 x float> }* 7083 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE_V]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP14]] 7084 // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* 7085 // CHECK: [[TMP16:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* 7086 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 7087 // CHECK: ret void 7088 float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) { 7089 return vld3_lane_f32(a, b, 1); 7090 } 7091 7092 // CHECK-LABEL: define void @test_vld3_lane_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { 7093 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 7094 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 7095 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 7096 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 7097 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 7098 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 7099 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 7100 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 7101 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 7102 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 7103 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 7104 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 7105 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 7106 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 7107 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 7108 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 7109 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 7110 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 7111 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 7112 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 7113 // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* 7114 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] 7115 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* 7116 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* 7117 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) 7118 // CHECK: ret void 7119 poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) { 7120 return vld3_lane_p8(a, b, 7); 7121 } 7122 7123 // CHECK-LABEL: define void @test_vld3_lane_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { 7124 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 7125 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 7126 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 7127 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 7128 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 7129 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 7130 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 7131 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 7132 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 7133 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 7134 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 7135 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7136 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 7137 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 7138 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 7139 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7140 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 7141 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 7142 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 7143 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 7144 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 7145 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 7146 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 7147 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 7148 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 7149 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 7150 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) 7151 // CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* 7152 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] 7153 // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* 7154 // CHECK: [[TMP16:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* 7155 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) 7156 // CHECK: ret void 7157 poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) { 7158 return vld3_lane_p16(a, b, 3); 7159 } 7160 7161 7162 // CHECK-LABEL: define void @test_vld4q_u8(%struct.uint8x16x4_t* noalias sret %agg.result, i8* %a) #0 { 7163 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 7164 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 7165 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) 7166 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 7167 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 7168 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* %agg.result to i8* 7169 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* 7170 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) 7171 // CHECK: ret void 7172 uint8x16x4_t test_vld4q_u8(uint8_t const * a) { 7173 return vld4q_u8(a); 7174 } 7175 7176 // CHECK-LABEL: define void @test_vld4q_u16(%struct.uint16x8x4_t* noalias sret %agg.result, i16* %a) #0 { 7177 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 7178 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 7179 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7180 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) 7181 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7182 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 7183 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* %agg.result to i8* 7184 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 7185 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7186 // CHECK: ret void 7187 uint16x8x4_t test_vld4q_u16(uint16_t const * a) { 7188 return vld4q_u16(a); 7189 } 7190 7191 // CHECK-LABEL: define void @test_vld4q_u32(%struct.uint32x4x4_t* noalias sret %agg.result, i32* %a) #0 { 7192 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 7193 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 7194 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7195 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4) 7196 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 7197 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] 7198 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* %agg.result to i8* 7199 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 7200 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7201 // CHECK: ret void 7202 uint32x4x4_t test_vld4q_u32(uint32_t const * a) { 7203 return vld4q_u32(a); 7204 } 7205 7206 // CHECK-LABEL: define void @test_vld4q_s8(%struct.int8x16x4_t* noalias sret %agg.result, i8* %a) #0 { 7207 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 7208 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 7209 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) 7210 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 7211 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 7212 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* %agg.result to i8* 7213 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* 7214 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) 7215 // CHECK: ret void 7216 int8x16x4_t test_vld4q_s8(int8_t const * a) { 7217 return vld4q_s8(a); 7218 } 7219 7220 // CHECK-LABEL: define void @test_vld4q_s16(%struct.int16x8x4_t* noalias sret %agg.result, i16* %a) #0 { 7221 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 7222 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 7223 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7224 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) 7225 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7226 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 7227 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* %agg.result to i8* 7228 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 7229 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7230 // CHECK: ret void 7231 int16x8x4_t test_vld4q_s16(int16_t const * a) { 7232 return vld4q_s16(a); 7233 } 7234 7235 // CHECK-LABEL: define void @test_vld4q_s32(%struct.int32x4x4_t* noalias sret %agg.result, i32* %a) #0 { 7236 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 7237 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 7238 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7239 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4) 7240 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 7241 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] 7242 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* %agg.result to i8* 7243 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 7244 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7245 // CHECK: ret void 7246 int32x4x4_t test_vld4q_s32(int32_t const * a) { 7247 return vld4q_s32(a); 7248 } 7249 7250 // CHECK-LABEL: define void @test_vld4q_f16(%struct.float16x8x4_t* noalias sret %agg.result, half* %a) #0 { 7251 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 7252 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 7253 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 7254 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) 7255 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7256 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 7257 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* %agg.result to i8* 7258 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 7259 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7260 // CHECK: ret void 7261 float16x8x4_t test_vld4q_f16(float16_t const * a) { 7262 return vld4q_f16(a); 7263 } 7264 7265 // CHECK-LABEL: define void @test_vld4q_f32(%struct.float32x4x4_t* noalias sret %agg.result, float* %a) #0 { 7266 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 7267 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 7268 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 7269 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8* [[TMP1]], i32 4) 7270 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 7271 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_V]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP2]] 7272 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* %agg.result to i8* 7273 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 7274 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7275 // CHECK: ret void 7276 float32x4x4_t test_vld4q_f32(float32_t const * a) { 7277 return vld4q_f32(a); 7278 } 7279 7280 // CHECK-LABEL: define void @test_vld4q_p8(%struct.poly8x16x4_t* noalias sret %agg.result, i8* %a) #0 { 7281 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 7282 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 7283 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) 7284 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* 7285 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] 7286 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* %agg.result to i8* 7287 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* 7288 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) 7289 // CHECK: ret void 7290 poly8x16x4_t test_vld4q_p8(poly8_t const * a) { 7291 return vld4q_p8(a); 7292 } 7293 7294 // CHECK-LABEL: define void @test_vld4q_p16(%struct.poly16x8x4_t* noalias sret %agg.result, i16* %a) #0 { 7295 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 7296 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 7297 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7298 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) 7299 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7300 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] 7301 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* %agg.result to i8* 7302 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 7303 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) 7304 // CHECK: ret void 7305 poly16x8x4_t test_vld4q_p16(poly16_t const * a) { 7306 return vld4q_p16(a); 7307 } 7308 7309 // CHECK-LABEL: define void @test_vld4_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7310 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 7311 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 7312 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) 7313 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7314 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 7315 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* 7316 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 7317 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) 7318 // CHECK: ret void 7319 uint8x8x4_t test_vld4_u8(uint8_t const * a) { 7320 return vld4_u8(a); 7321 } 7322 7323 // CHECK-LABEL: define void @test_vld4_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7324 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 7325 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 7326 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7327 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) 7328 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7329 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 7330 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* 7331 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 7332 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7333 // CHECK: ret void 7334 uint16x4x4_t test_vld4_u16(uint16_t const * a) { 7335 return vld4_u16(a); 7336 } 7337 7338 // CHECK-LABEL: define void @test_vld4_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a) #0 { 7339 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 7340 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 7341 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7342 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4) 7343 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 7344 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] 7345 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* 7346 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 7347 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7348 // CHECK: ret void 7349 uint32x2x4_t test_vld4_u32(uint32_t const * a) { 7350 return vld4_u32(a); 7351 } 7352 7353 // CHECK-LABEL: define void @test_vld4_u64(%struct.uint64x1x4_t* noalias sret %agg.result, i64* %a) #0 { 7354 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 7355 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 7356 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 7357 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) 7358 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 7359 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 7360 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x4_t* %agg.result to i8* 7361 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 7362 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7363 // CHECK: ret void 7364 uint64x1x4_t test_vld4_u64(uint64_t const * a) { 7365 return vld4_u64(a); 7366 } 7367 7368 // CHECK-LABEL: define void @test_vld4_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7369 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 7370 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 7371 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) 7372 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7373 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 7374 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* 7375 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 7376 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) 7377 // CHECK: ret void 7378 int8x8x4_t test_vld4_s8(int8_t const * a) { 7379 return vld4_s8(a); 7380 } 7381 7382 // CHECK-LABEL: define void @test_vld4_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7383 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 7384 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 7385 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7386 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) 7387 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7388 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 7389 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* 7390 // CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 7391 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7392 // CHECK: ret void 7393 int16x4x4_t test_vld4_s16(int16_t const * a) { 7394 return vld4_s16(a); 7395 } 7396 7397 // CHECK-LABEL: define void @test_vld4_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a) #0 { 7398 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 7399 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 7400 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7401 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4) 7402 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 7403 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] 7404 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* 7405 // CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 7406 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7407 // CHECK: ret void 7408 int32x2x4_t test_vld4_s32(int32_t const * a) { 7409 return vld4_s32(a); 7410 } 7411 7412 // CHECK-LABEL: define void @test_vld4_s64(%struct.int64x1x4_t* noalias sret %agg.result, i64* %a) #0 { 7413 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 7414 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 7415 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 7416 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) 7417 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 7418 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 7419 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x4_t* %agg.result to i8* 7420 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 7421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7422 // CHECK: ret void 7423 int64x1x4_t test_vld4_s64(int64_t const * a) { 7424 return vld4_s64(a); 7425 } 7426 7427 // CHECK-LABEL: define void @test_vld4_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a) #0 { 7428 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 7429 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 7430 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 7431 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) 7432 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7433 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 7434 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* 7435 // CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 7436 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7437 // CHECK: ret void 7438 float16x4x4_t test_vld4_f16(float16_t const * a) { 7439 return vld4_f16(a); 7440 } 7441 7442 // CHECK-LABEL: define void @test_vld4_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a) #0 { 7443 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 7444 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 7445 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 7446 // CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32.p0i8(i8* [[TMP1]], i32 4) 7447 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 7448 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_V]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP2]] 7449 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* 7450 // CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 7451 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7452 // CHECK: ret void 7453 float32x2x4_t test_vld4_f32(float32_t const * a) { 7454 return vld4_f32(a); 7455 } 7456 7457 // CHECK-LABEL: define void @test_vld4_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7458 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 7459 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 7460 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) 7461 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7462 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] 7463 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* 7464 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 7465 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) 7466 // CHECK: ret void 7467 poly8x8x4_t test_vld4_p8(poly8_t const * a) { 7468 return vld4_p8(a); 7469 } 7470 7471 // CHECK-LABEL: define void @test_vld4_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7472 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 7473 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 7474 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7475 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) 7476 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7477 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] 7478 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* 7479 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 7480 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7481 // CHECK: ret void 7482 poly16x4x4_t test_vld4_p16(poly16_t const * a) { 7483 return vld4_p16(a); 7484 } 7485 7486 7487 // CHECK-LABEL: define void @test_vld4_dup_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7488 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 7489 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 7490 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 7491 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 7492 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 7493 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 7494 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 7495 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 7496 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 7497 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 7498 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 7499 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 7500 // CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 7501 // CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer 7502 // CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 7503 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7504 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] 7505 // CHECK: [[TMP10:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* 7506 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 7507 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) 7508 // CHECK: ret void 7509 uint8x8x4_t test_vld4_dup_u8(uint8_t const * a) { 7510 return vld4_dup_u8(a); 7511 } 7512 7513 // CHECK-LABEL: define void @test_vld4_dup_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7514 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 7515 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 7516 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7517 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 7518 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 7519 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 7520 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 7521 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 7522 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 7523 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 7524 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 7525 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 7526 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 7527 // CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 7528 // CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer 7529 // CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 7530 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7531 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] 7532 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* 7533 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 7534 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7535 // CHECK: ret void 7536 uint16x4x4_t test_vld4_dup_u16(uint16_t const * a) { 7537 return vld4_dup_u16(a); 7538 } 7539 7540 // CHECK-LABEL: define void @test_vld4_dup_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a) #0 { 7541 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 7542 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 7543 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7544 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 7545 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 7546 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 7547 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 7548 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 7549 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 7550 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 7551 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 7552 // CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer 7553 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 7554 // CHECK: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], 3 7555 // CHECK: [[LANE3:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP8]], <2 x i32> zeroinitializer 7556 // CHECK: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], <2 x i32> [[LANE3]], 3 7557 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 7558 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP10]] 7559 // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* 7560 // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 7561 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7562 // CHECK: ret void 7563 uint32x2x4_t test_vld4_dup_u32(uint32_t const * a) { 7564 return vld4_dup_u32(a); 7565 } 7566 7567 // CHECK-LABEL: define void @test_vld4_dup_u64(%struct.uint64x1x4_t* noalias sret %agg.result, i64* %a) #0 { 7568 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 7569 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 7570 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 7571 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) 7572 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 7573 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 7574 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x4_t* %agg.result to i8* 7575 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* 7576 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7577 // CHECK: ret void 7578 uint64x1x4_t test_vld4_dup_u64(uint64_t const * a) { 7579 return vld4_dup_u64(a); 7580 } 7581 7582 // CHECK-LABEL: define void @test_vld4_dup_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7583 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 7584 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 7585 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 7586 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 7587 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 7588 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 7589 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 7590 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 7591 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 7592 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 7593 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 7594 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 7595 // CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 7596 // CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer 7597 // CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 7598 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7599 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] 7600 // CHECK: [[TMP10:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* 7601 // CHECK: [[TMP11:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 7602 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) 7603 // CHECK: ret void 7604 int8x8x4_t test_vld4_dup_s8(int8_t const * a) { 7605 return vld4_dup_s8(a); 7606 } 7607 7608 // CHECK-LABEL: define void @test_vld4_dup_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7609 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 7610 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 7611 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7612 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 7613 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 7614 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 7615 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 7616 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 7617 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 7618 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 7619 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 7620 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 7621 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 7622 // CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 7623 // CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer 7624 // CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 7625 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7626 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] 7627 // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* 7628 // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 7629 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7630 // CHECK: ret void 7631 int16x4x4_t test_vld4_dup_s16(int16_t const * a) { 7632 return vld4_dup_s16(a); 7633 } 7634 7635 // CHECK-LABEL: define void @test_vld4_dup_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a) #0 { 7636 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 7637 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 7638 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* 7639 // CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) 7640 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 7641 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer 7642 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 7643 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 7644 // CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer 7645 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 7646 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 7647 // CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer 7648 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 7649 // CHECK: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], 3 7650 // CHECK: [[LANE3:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP8]], <2 x i32> zeroinitializer 7651 // CHECK: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], <2 x i32> [[LANE3]], 3 7652 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 7653 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP10]] 7654 // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* 7655 // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 7656 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7657 // CHECK: ret void 7658 int32x2x4_t test_vld4_dup_s32(int32_t const * a) { 7659 return vld4_dup_s32(a); 7660 } 7661 7662 // CHECK-LABEL: define void @test_vld4_dup_s64(%struct.int64x1x4_t* noalias sret %agg.result, i64* %a) #0 { 7663 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 7664 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 7665 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* 7666 // CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) 7667 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* 7668 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] 7669 // CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x4_t* %agg.result to i8* 7670 // CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* 7671 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) 7672 // CHECK: ret void 7673 int64x1x4_t test_vld4_dup_s64(int64_t const * a) { 7674 return vld4_dup_s64(a); 7675 } 7676 7677 // CHECK-LABEL: define void @test_vld4_dup_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a) #0 { 7678 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 7679 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 7680 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* 7681 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 7682 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 7683 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 7684 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 7685 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 7686 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 7687 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 7688 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 7689 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 7690 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 7691 // CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 7692 // CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer 7693 // CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 7694 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7695 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] 7696 // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* 7697 // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 7698 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7699 // CHECK: ret void 7700 float16x4x4_t test_vld4_dup_f16(float16_t const * a) { 7701 return vld4_dup_f16(a); 7702 } 7703 7704 // CHECK-LABEL: define void @test_vld4_dup_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a) #0 { 7705 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 7706 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 7707 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* 7708 // CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) 7709 // CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], 0 7710 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer 7711 // CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 7712 // CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP3]], 1 7713 // CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer 7714 // CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 7715 // CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP5]], 2 7716 // CHECK: [[LANE2:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer 7717 // CHECK: [[TMP7:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP5]], <2 x float> [[LANE2]], 2 7718 // CHECK: [[TMP8:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP7]], 3 7719 // CHECK: [[LANE3:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP8]], <2 x i32> zeroinitializer 7720 // CHECK: [[TMP9:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP7]], <2 x float> [[LANE3]], 3 7721 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 7722 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP9]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP10]] 7723 // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* 7724 // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 7725 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7726 // CHECK: ret void 7727 float32x2x4_t test_vld4_dup_f32(float32_t const * a) { 7728 return vld4_dup_f32(a); 7729 } 7730 7731 // CHECK-LABEL: define void @test_vld4_dup_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a) #0 { 7732 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 7733 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 7734 // CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 7735 // CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 7736 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 7737 // CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 7738 // CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 7739 // CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer 7740 // CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 7741 // CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 7742 // CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer 7743 // CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 7744 // CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 7745 // CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer 7746 // CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 7747 // CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 7748 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] 7749 // CHECK: [[TMP10:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* 7750 // CHECK: [[TMP11:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 7751 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) 7752 // CHECK: ret void 7753 poly8x8x4_t test_vld4_dup_p8(poly8_t const * a) { 7754 return vld4_dup_p8(a); 7755 } 7756 7757 // CHECK-LABEL: define void @test_vld4_dup_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a) #0 { 7758 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 7759 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 7760 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* 7761 // CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 7762 // CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 7763 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer 7764 // CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 7765 // CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 7766 // CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer 7767 // CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 7768 // CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 7769 // CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer 7770 // CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 7771 // CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 7772 // CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer 7773 // CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 7774 // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 7775 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] 7776 // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* 7777 // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 7778 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) 7779 // CHECK: ret void 7780 poly16x4x4_t test_vld4_dup_p16(poly16_t const * a) { 7781 return vld4_dup_p16(a); 7782 } 7783 7784 7785 // CHECK-LABEL: define void @test_vld4q_lane_u16(%struct.uint16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { 7786 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 7787 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 7788 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 7789 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 7790 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 7791 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7792 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 7793 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 7794 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7795 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 7796 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 7797 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7798 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 7799 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 7800 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 7801 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7802 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 7803 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 7804 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 7805 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7806 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 7807 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 7808 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 7809 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 7810 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 7811 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 7812 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> 7813 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7814 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7815 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7816 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> 7817 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) 7818 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7819 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] 7820 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* %agg.result to i8* 7821 // CHECK: [[TMP19:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* 7822 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7823 // CHECK: ret void 7824 uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) { 7825 return vld4q_lane_u16(a, b, 7); 7826 } 7827 7828 // CHECK-LABEL: define void @test_vld4q_lane_u32(%struct.uint32x4x4_t* noalias sret %agg.result, i32* %a, [8 x i64] %b.coerce) #0 { 7829 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 7830 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 7831 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 7832 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 7833 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 7834 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7835 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 7836 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 7837 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7838 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 7839 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 7840 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7841 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 7842 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 7843 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 7844 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7845 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 7846 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 7847 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 7848 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7849 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 7850 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 7851 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 7852 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 7853 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 7854 // CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 7855 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8> 7856 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 7857 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 7858 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 7859 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32> 7860 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], i32 3, i32 4) 7861 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 7862 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP17]] 7863 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* %agg.result to i8* 7864 // CHECK: [[TMP19:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* 7865 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7866 // CHECK: ret void 7867 uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) { 7868 return vld4q_lane_u32(a, b, 3); 7869 } 7870 7871 // CHECK-LABEL: define void @test_vld4q_lane_s16(%struct.int16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { 7872 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 7873 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 7874 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 7875 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 7876 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 7877 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7878 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 7879 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 7880 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7881 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 7882 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 7883 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7884 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 7885 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 7886 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 7887 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7888 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 7889 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 7890 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 7891 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7892 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 7893 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 7894 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 7895 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 7896 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 7897 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 7898 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> 7899 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7900 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7901 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7902 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> 7903 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) 7904 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7905 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] 7906 // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* %agg.result to i8* 7907 // CHECK: [[TMP19:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* 7908 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7909 // CHECK: ret void 7910 int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) { 7911 return vld4q_lane_s16(a, b, 7); 7912 } 7913 7914 // CHECK-LABEL: define void @test_vld4q_lane_s32(%struct.int32x4x4_t* noalias sret %agg.result, i32* %a, [8 x i64] %b.coerce) #0 { 7915 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 7916 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 7917 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 7918 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 7919 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 7920 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7921 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 7922 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 7923 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7924 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 7925 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 7926 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7927 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 7928 // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 7929 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 7930 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7931 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 7932 // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 7933 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 7934 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7935 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 7936 // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 7937 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 7938 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 7939 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 7940 // CHECK: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 7941 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8> 7942 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 7943 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 7944 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 7945 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32> 7946 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], i32 3, i32 4) 7947 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* 7948 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP17]] 7949 // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* %agg.result to i8* 7950 // CHECK: [[TMP19:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* 7951 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7952 // CHECK: ret void 7953 int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) { 7954 return vld4q_lane_s32(a, b, 3); 7955 } 7956 7957 // CHECK-LABEL: define void @test_vld4q_lane_f16(%struct.float16x8x4_t* noalias sret %agg.result, half* %a, [8 x i64] %b.coerce) #0 { 7958 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 7959 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 7960 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 7961 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 7962 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* 7963 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 7964 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 7965 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 7966 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 7967 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 7968 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 7969 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7970 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 7971 // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 7972 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 7973 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7974 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 7975 // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 7976 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 7977 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7978 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 7979 // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 7980 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 7981 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 7982 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 7983 // CHECK: [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 7984 // CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8> 7985 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 7986 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 7987 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 7988 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> 7989 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) 7990 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 7991 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] 7992 // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* %agg.result to i8* 7993 // CHECK: [[TMP19:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* 7994 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 7995 // CHECK: ret void 7996 float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) { 7997 return vld4q_lane_f16(a, b, 7); 7998 } 7999 8000 // CHECK-LABEL: define void @test_vld4q_lane_f32(%struct.float32x4x4_t* noalias sret %agg.result, float* %a, [8 x i64] %b.coerce) #0 { 8001 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 8002 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 8003 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 8004 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 8005 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* 8006 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 8007 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 8008 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 8009 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 8010 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 8011 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 8012 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 8013 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 8014 // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 8015 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 8016 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 8017 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 8018 // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 8019 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 8020 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 8021 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 8022 // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 8023 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 8024 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 8025 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 8026 // CHECK: [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 8027 // CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8> 8028 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 8029 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 8030 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 8031 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float> 8032 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], <4 x float> [[TMP16]], i32 3, i32 4) 8033 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* 8034 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_LANE_V]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP17]] 8035 // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* %agg.result to i8* 8036 // CHECK: [[TMP19:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* 8037 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 8038 // CHECK: ret void 8039 float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) { 8040 return vld4q_lane_f32(a, b, 3); 8041 } 8042 8043 // CHECK-LABEL: define void @test_vld4q_lane_p16(%struct.poly16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { 8044 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 8045 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 8046 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 8047 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 8048 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 8049 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 8050 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 8051 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 8052 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 8053 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 8054 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 8055 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 8056 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 8057 // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 8058 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 8059 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 8060 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 8061 // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 8062 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 8063 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 8064 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 8065 // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 8066 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 8067 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 8068 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 8069 // CHECK: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 8070 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8> 8071 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 8072 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 8073 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 8074 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> 8075 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) 8076 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* 8077 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] 8078 // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* %agg.result to i8* 8079 // CHECK: [[TMP19:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* 8080 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) 8081 // CHECK: ret void 8082 poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) { 8083 return vld4q_lane_p16(a, b, 7); 8084 } 8085 8086 // CHECK-LABEL: define void @test_vld4_lane_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { 8087 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 8088 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 8089 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 8090 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 8091 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 8092 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8093 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 8094 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 8095 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8096 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 8097 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 8098 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 8099 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 8100 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 8101 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 8102 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 8103 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 8104 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 8105 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 8106 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 8107 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 8108 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 8109 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) 8110 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 8111 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] 8112 // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* 8113 // CHECK: [[TMP10:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* 8114 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) 8115 // CHECK: ret void 8116 uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) { 8117 return vld4_lane_u8(a, b, 7); 8118 } 8119 8120 // CHECK-LABEL: define void @test_vld4_lane_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 8121 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 8122 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 8123 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 8124 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 8125 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 8126 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8127 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 8128 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 8129 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8130 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 8131 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 8132 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 8133 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 8134 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 8135 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 8136 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 8137 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 8138 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 8139 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 8140 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 8141 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 8142 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 8143 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 8144 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 8145 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 8146 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 8147 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8> 8148 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 8149 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 8150 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 8151 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> 8152 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) 8153 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 8154 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] 8155 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* 8156 // CHECK: [[TMP19:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* 8157 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8158 // CHECK: ret void 8159 uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) { 8160 return vld4_lane_u16(a, b, 3); 8161 } 8162 8163 // CHECK-LABEL: define void @test_vld4_lane_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { 8164 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 8165 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 8166 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 8167 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 8168 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 8169 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8170 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 8171 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 8172 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8173 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 8174 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 8175 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 8176 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 8177 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 8178 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 8179 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 8180 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 8181 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 8182 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 8183 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 8184 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 8185 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 8186 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 8187 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 8188 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 8189 // CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 8190 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8> 8191 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 8192 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 8193 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 8194 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32> 8195 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], i32 1, i32 4) 8196 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 8197 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP17]] 8198 // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* 8199 // CHECK: [[TMP19:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* 8200 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8201 // CHECK: ret void 8202 uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) { 8203 return vld4_lane_u32(a, b, 1); 8204 } 8205 8206 // CHECK-LABEL: define void @test_vld4_lane_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { 8207 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 8208 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 8209 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 8210 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 8211 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 8212 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8213 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 8214 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 8215 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8216 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 8217 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 8218 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 8219 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 8220 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 8221 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 8222 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 8223 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 8224 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 8225 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 8226 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 8227 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 8228 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 8229 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) 8230 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 8231 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] 8232 // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* 8233 // CHECK: [[TMP10:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* 8234 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) 8235 // CHECK: ret void 8236 int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) { 8237 return vld4_lane_s8(a, b, 7); 8238 } 8239 8240 // CHECK-LABEL: define void @test_vld4_lane_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 8241 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 8242 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 8243 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 8244 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 8245 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 8246 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8247 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 8248 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 8249 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8250 // CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 8251 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 8252 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 8253 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 8254 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 8255 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 8256 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 8257 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 8258 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 8259 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 8260 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 8261 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 8262 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 8263 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 8264 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 8265 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 8266 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 8267 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8> 8268 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 8269 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 8270 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 8271 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> 8272 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) 8273 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 8274 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] 8275 // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* 8276 // CHECK: [[TMP19:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* 8277 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8278 // CHECK: ret void 8279 int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) { 8280 return vld4_lane_s16(a, b, 3); 8281 } 8282 8283 // CHECK-LABEL: define void @test_vld4_lane_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { 8284 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 8285 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 8286 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 8287 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 8288 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 8289 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8290 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 8291 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 8292 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8293 // CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 8294 // CHECK: [[TMP4:%.*]] = bitcast i32* %a to i8* 8295 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 8296 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 8297 // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 8298 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 8299 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 8300 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 8301 // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 8302 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 8303 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 8304 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 8305 // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 8306 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 8307 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 8308 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 8309 // CHECK: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 8310 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8> 8311 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 8312 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 8313 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 8314 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32> 8315 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], i32 1, i32 4) 8316 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* 8317 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP17]] 8318 // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* 8319 // CHECK: [[TMP19:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* 8320 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8321 // CHECK: ret void 8322 int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) { 8323 return vld4_lane_s32(a, b, 1); 8324 } 8325 8326 // CHECK-LABEL: define void @test_vld4_lane_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a, [4 x i64] %b.coerce) #0 { 8327 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 8328 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 8329 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 8330 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 8331 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]* 8332 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8333 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 8334 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 8335 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8336 // CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 8337 // CHECK: [[TMP4:%.*]] = bitcast half* %a to i8* 8338 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 8339 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0 8340 // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 8341 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 8342 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 8343 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1 8344 // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 8345 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 8346 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 8347 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2 8348 // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 8349 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 8350 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 8351 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3 8352 // CHECK: [[TMP11:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 8353 // CHECK: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8> 8354 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 8355 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 8356 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 8357 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> 8358 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) 8359 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 8360 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] 8361 // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* 8362 // CHECK: [[TMP19:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* 8363 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8364 // CHECK: ret void 8365 float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) { 8366 return vld4_lane_f16(a, b, 3); 8367 } 8368 8369 // CHECK-LABEL: define void @test_vld4_lane_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a, [4 x i64] %b.coerce) #0 { 8370 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 8371 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 8372 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 8373 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 8374 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]* 8375 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8376 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 8377 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 8378 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8379 // CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 8380 // CHECK: [[TMP4:%.*]] = bitcast float* %a to i8* 8381 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 8382 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0 8383 // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 8384 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 8385 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 8386 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1 8387 // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 8388 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 8389 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 8390 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2 8391 // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 8392 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 8393 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 8394 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3 8395 // CHECK: [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 8396 // CHECK: [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8> 8397 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 8398 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 8399 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 8400 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float> 8401 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], <2 x float> [[TMP16]], i32 1, i32 4) 8402 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* 8403 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE_V]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP17]] 8404 // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* 8405 // CHECK: [[TMP19:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* 8406 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8407 // CHECK: ret void 8408 float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) { 8409 return vld4_lane_f32(a, b, 1); 8410 } 8411 8412 // CHECK-LABEL: define void @test_vld4_lane_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { 8413 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 8414 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 8415 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 8416 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 8417 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 8418 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8419 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 8420 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 8421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8422 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 8423 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 8424 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 8425 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 8426 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 8427 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 8428 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 8429 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 8430 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 8431 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 8432 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 8433 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 8434 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 8435 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) 8436 // CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* 8437 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] 8438 // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* 8439 // CHECK: [[TMP10:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* 8440 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) 8441 // CHECK: ret void 8442 poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) { 8443 return vld4_lane_p8(a, b, 7); 8444 } 8445 8446 // CHECK-LABEL: define void @test_vld4_lane_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { 8447 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 8448 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 8449 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 8450 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 8451 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 8452 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 8453 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 8454 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 8455 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 8456 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 8457 // CHECK: [[TMP4:%.*]] = bitcast i16* %a to i8* 8458 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 8459 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 8460 // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 8461 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 8462 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 8463 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 8464 // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 8465 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 8466 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 8467 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 8468 // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 8469 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 8470 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 8471 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 8472 // CHECK: [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 8473 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8> 8474 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 8475 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 8476 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 8477 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> 8478 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) 8479 // CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* 8480 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] 8481 // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* 8482 // CHECK: [[TMP19:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* 8483 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) 8484 // CHECK: ret void 8485 poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) { 8486 return vld4_lane_p16(a, b, 3); 8487 } 8488 8489 8490 // CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { 8491 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8492 // CHECK: ret <8 x i8> [[VMAX_V_I]] 8493 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { 8494 return vmax_s8(a, b); 8495 } 8496 8497 // CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { 8498 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8499 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8500 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8501 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8502 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]]) #4 8503 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8> 8504 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16> 8505 // CHECK: ret <4 x i16> [[TMP2]] 8506 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { 8507 return vmax_s16(a, b); 8508 } 8509 8510 // CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { 8511 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8512 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8513 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8514 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8515 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]]) #4 8516 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8> 8517 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32> 8518 // CHECK: ret <2 x i32> [[TMP2]] 8519 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { 8520 return vmax_s32(a, b); 8521 } 8522 8523 // CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { 8524 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8525 // CHECK: ret <8 x i8> [[VMAX_V_I]] 8526 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { 8527 return vmax_u8(a, b); 8528 } 8529 8530 // CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { 8531 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8532 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8533 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8534 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8535 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]]) #4 8536 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8> 8537 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16> 8538 // CHECK: ret <4 x i16> [[TMP2]] 8539 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { 8540 return vmax_u16(a, b); 8541 } 8542 8543 // CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { 8544 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8545 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8546 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8547 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8548 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]]) #4 8549 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8> 8550 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32> 8551 // CHECK: ret <2 x i32> [[TMP2]] 8552 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { 8553 return vmax_u32(a, b); 8554 } 8555 8556 // CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 { 8557 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 8558 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 8559 // CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 8560 // CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 8561 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> [[VMAX_V_I]], <2 x float> [[VMAX_V1_I]]) #4 8562 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8> 8563 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x float> 8564 // CHECK: ret <2 x float> [[TMP2]] 8565 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { 8566 return vmax_f32(a, b); 8567 } 8568 8569 // CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 8570 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 8571 // CHECK: ret <16 x i8> [[VMAXQ_V_I]] 8572 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { 8573 return vmaxq_s8(a, b); 8574 } 8575 8576 // CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8577 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8578 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8579 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8580 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8581 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]]) #4 8582 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8> 8583 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16> 8584 // CHECK: ret <8 x i16> [[TMP2]] 8585 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { 8586 return vmaxq_s16(a, b); 8587 } 8588 8589 // CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8590 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8591 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8592 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8593 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8594 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]]) #4 8595 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8> 8596 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32> 8597 // CHECK: ret <4 x i32> [[TMP2]] 8598 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { 8599 return vmaxq_s32(a, b); 8600 } 8601 8602 // CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 8603 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 8604 // CHECK: ret <16 x i8> [[VMAXQ_V_I]] 8605 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { 8606 return vmaxq_u8(a, b); 8607 } 8608 8609 // CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8610 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8612 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8613 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8614 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]]) #4 8615 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8> 8616 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16> 8617 // CHECK: ret <8 x i16> [[TMP2]] 8618 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { 8619 return vmaxq_u16(a, b); 8620 } 8621 8622 // CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8623 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8624 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8625 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8626 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8627 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]]) #4 8628 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8> 8629 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32> 8630 // CHECK: ret <4 x i32> [[TMP2]] 8631 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { 8632 return vmaxq_u32(a, b); 8633 } 8634 8635 // CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { 8636 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 8637 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 8638 // CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 8639 // CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 8640 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> [[VMAXQ_V_I]], <4 x float> [[VMAXQ_V1_I]]) #4 8641 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8> 8642 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x float> 8643 // CHECK: ret <4 x float> [[TMP2]] 8644 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { 8645 return vmaxq_f32(a, b); 8646 } 8647 8648 8649 // CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { 8650 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8651 // CHECK: ret <8 x i8> [[VMIN_V_I]] 8652 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { 8653 return vmin_s8(a, b); 8654 } 8655 8656 // CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { 8657 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8658 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8659 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8660 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8661 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]]) #4 8662 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8> 8663 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16> 8664 // CHECK: ret <4 x i16> [[TMP2]] 8665 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { 8666 return vmin_s16(a, b); 8667 } 8668 8669 // CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { 8670 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8671 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8672 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8673 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8674 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]]) #4 8675 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8> 8676 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32> 8677 // CHECK: ret <2 x i32> [[TMP2]] 8678 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { 8679 return vmin_s32(a, b); 8680 } 8681 8682 // CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { 8683 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 8684 // CHECK: ret <8 x i8> [[VMIN_V_I]] 8685 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { 8686 return vmin_u8(a, b); 8687 } 8688 8689 // CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { 8690 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8691 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8692 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8693 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8694 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]]) #4 8695 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8> 8696 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16> 8697 // CHECK: ret <4 x i16> [[TMP2]] 8698 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { 8699 return vmin_u16(a, b); 8700 } 8701 8702 // CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { 8703 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8704 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8705 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8706 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8707 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]]) #4 8708 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8> 8709 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32> 8710 // CHECK: ret <2 x i32> [[TMP2]] 8711 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { 8712 return vmin_u32(a, b); 8713 } 8714 8715 // CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 { 8716 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 8717 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 8718 // CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 8719 // CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 8720 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> [[VMIN_V_I]], <2 x float> [[VMIN_V1_I]]) #4 8721 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8> 8722 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x float> 8723 // CHECK: ret <2 x float> [[TMP2]] 8724 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { 8725 return vmin_f32(a, b); 8726 } 8727 8728 // CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 8729 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b) #4 8730 // CHECK: ret <16 x i8> [[VMINQ_V_I]] 8731 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { 8732 return vminq_s8(a, b); 8733 } 8734 8735 // CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 8736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8737 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8738 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8739 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8740 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]]) #4 8741 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8> 8742 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16> 8743 // CHECK: ret <8 x i16> [[TMP2]] 8744 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { 8745 return vminq_s16(a, b); 8746 } 8747 8748 // CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 8749 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8750 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8751 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8752 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8753 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]]) #4 8754 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8> 8755 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32> 8756 // CHECK: ret <4 x i32> [[TMP2]] 8757 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { 8758 return vminq_s32(a, b); 8759 } 8760 8761 // CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 8762 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 8763 // CHECK: ret <16 x i8> [[VMINQ_V_I]] 8764 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { 8765 return vminq_u8(a, b); 8766 } 8767 8768 // CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 8769 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 8770 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 8771 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 8772 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 8773 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]]) #4 8774 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8> 8775 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16> 8776 // CHECK: ret <8 x i16> [[TMP2]] 8777 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { 8778 return vminq_u16(a, b); 8779 } 8780 8781 // CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 8782 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8783 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 8784 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 8785 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 8786 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]]) #4 8787 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8> 8788 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32> 8789 // CHECK: ret <4 x i32> [[TMP2]] 8790 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { 8791 return vminq_u32(a, b); 8792 } 8793 8794 // CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 { 8795 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 8796 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 8797 // CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 8798 // CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 8799 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> [[VMINQ_V_I]], <4 x float> [[VMINQ_V1_I]]) #4 8800 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8> 8801 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x float> 8802 // CHECK: ret <4 x float> [[TMP2]] 8803 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { 8804 return vminq_f32(a, b); 8805 } 8806 8807 8808 // CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8809 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c 8810 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]] 8811 // CHECK: ret <8 x i8> [[ADD_I]] 8812 int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) { 8813 return vmla_s8(a, b, c); 8814 } 8815 8816 // CHECK-LABEL: define <4 x i16> @test_vmla_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8817 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c 8818 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 8819 // CHECK: ret <4 x i16> [[ADD_I]] 8820 int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 8821 return vmla_s16(a, b, c); 8822 } 8823 8824 // CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8825 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c 8826 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 8827 // CHECK: ret <2 x i32> [[ADD_I]] 8828 int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 8829 return vmla_s32(a, b, c); 8830 } 8831 8832 // CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 8833 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c 8834 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]] 8835 // CHECK: ret <2 x float> [[ADD_I]] 8836 float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 8837 return vmla_f32(a, b, c); 8838 } 8839 8840 // CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8841 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c 8842 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]] 8843 // CHECK: ret <8 x i8> [[ADD_I]] 8844 uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 8845 return vmla_u8(a, b, c); 8846 } 8847 8848 // CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8849 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c 8850 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 8851 // CHECK: ret <4 x i16> [[ADD_I]] 8852 uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 8853 return vmla_u16(a, b, c); 8854 } 8855 8856 // CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8857 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c 8858 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 8859 // CHECK: ret <2 x i32> [[ADD_I]] 8860 uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 8861 return vmla_u32(a, b, c); 8862 } 8863 8864 // CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8865 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c 8866 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]] 8867 // CHECK: ret <16 x i8> [[ADD_I]] 8868 int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { 8869 return vmlaq_s8(a, b, c); 8870 } 8871 8872 // CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8873 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c 8874 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 8875 // CHECK: ret <8 x i16> [[ADD_I]] 8876 int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { 8877 return vmlaq_s16(a, b, c); 8878 } 8879 8880 // CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8881 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c 8882 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 8883 // CHECK: ret <4 x i32> [[ADD_I]] 8884 int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { 8885 return vmlaq_s32(a, b, c); 8886 } 8887 8888 // CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 8889 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c 8890 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]] 8891 // CHECK: ret <4 x float> [[ADD_I]] 8892 float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { 8893 return vmlaq_f32(a, b, c); 8894 } 8895 8896 // CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 8897 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c 8898 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]] 8899 // CHECK: ret <16 x i8> [[ADD_I]] 8900 uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { 8901 return vmlaq_u8(a, b, c); 8902 } 8903 8904 // CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 8905 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c 8906 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 8907 // CHECK: ret <8 x i16> [[ADD_I]] 8908 uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { 8909 return vmlaq_u16(a, b, c); 8910 } 8911 8912 // CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 8913 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c 8914 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 8915 // CHECK: ret <4 x i32> [[ADD_I]] 8916 uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { 8917 return vmlaq_u32(a, b, c); 8918 } 8919 8920 8921 // CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8922 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8923 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8924 // CHECK: ret <8 x i16> [[ADD_I]] 8925 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8926 return vmlal_s8(a, b, c); 8927 } 8928 8929 // CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8930 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8931 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8932 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8933 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8934 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8935 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8936 // CHECK: ret <4 x i32> [[ADD_I]] 8937 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8938 return vmlal_s16(a, b, c); 8939 } 8940 8941 // CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8942 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8943 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8944 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8945 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8946 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8947 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8948 // CHECK: ret <2 x i64> [[ADD_I]] 8949 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8950 return vmlal_s32(a, b, c); 8951 } 8952 8953 // CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 8954 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4 8955 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8956 // CHECK: ret <8 x i16> [[ADD_I]] 8957 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8958 return vmlal_u8(a, b, c); 8959 } 8960 8961 // CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8962 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8963 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8964 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8965 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8966 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 8967 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8968 // CHECK: ret <4 x i32> [[ADD_I]] 8969 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 8970 return vmlal_u16(a, b, c); 8971 } 8972 8973 // CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 8974 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8975 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8976 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 8977 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 8978 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 8979 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8980 // CHECK: ret <2 x i64> [[ADD_I]] 8981 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 8982 return vmlal_u32(a, b, c); 8983 } 8984 8985 8986 // CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 8987 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 8988 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8989 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 8990 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 8991 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 8992 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 8993 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 8994 // CHECK: ret <4 x i32> [[ADD]] 8995 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8996 return vmlal_lane_s16(a, b, c, 3); 8997 } 8998 8999 // CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9000 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9001 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9002 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 9003 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9004 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9005 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 9006 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 9007 // CHECK: ret <2 x i64> [[ADD]] 9008 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9009 return vmlal_lane_s32(a, b, c, 1); 9010 } 9011 9012 // CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9013 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9014 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9015 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 9016 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9017 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9018 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 9019 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] 9020 // CHECK: ret <4 x i32> [[ADD]] 9021 uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 9022 return vmlal_lane_u16(a, b, c, 3); 9023 } 9024 9025 // CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9026 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9027 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9028 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 9029 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9030 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9031 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 9032 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] 9033 // CHECK: ret <2 x i64> [[ADD]] 9034 uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 9035 return vmlal_lane_u32(a, b, c, 1); 9036 } 9037 9038 9039 // CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { 9040 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9041 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9042 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9043 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9044 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9045 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 9046 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9047 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9048 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9049 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 9050 // CHECK: ret <4 x i32> [[ADD_I]] 9051 int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 9052 return vmlal_n_s16(a, b, c); 9053 } 9054 9055 // CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 9056 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9057 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9058 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9059 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 9060 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9061 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9062 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9063 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 9064 // CHECK: ret <2 x i64> [[ADD_I]] 9065 int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 9066 return vmlal_n_s32(a, b, c); 9067 } 9068 9069 // CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(<4 x i32> %a, <4 x i16> %b, i16 zeroext %c) #0 { 9070 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9071 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9072 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9073 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9074 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9075 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 9076 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9077 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9078 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9079 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 9080 // CHECK: ret <4 x i32> [[ADD_I]] 9081 uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { 9082 return vmlal_n_u16(a, b, c); 9083 } 9084 9085 // CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 9086 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9087 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 9090 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9091 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9092 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9093 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 9094 // CHECK: ret <2 x i64> [[ADD_I]] 9095 uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { 9096 return vmlal_n_u32(a, b, c); 9097 } 9098 9099 9100 // CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9101 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9102 // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 9103 // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 9104 // CHECK: ret <4 x i16> [[ADD]] 9105 int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 9106 return vmla_lane_s16(a, b, c, 3); 9107 } 9108 9109 // CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9110 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9111 // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 9112 // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 9113 // CHECK: ret <2 x i32> [[ADD]] 9114 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 9115 return vmla_lane_s32(a, b, c, 1); 9116 } 9117 9118 // CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9119 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9120 // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 9121 // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] 9122 // CHECK: ret <4 x i16> [[ADD]] 9123 uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 9124 return vmla_lane_u16(a, b, c, 3); 9125 } 9126 9127 // CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9128 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9129 // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 9130 // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] 9131 // CHECK: ret <2 x i32> [[ADD]] 9132 uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 9133 return vmla_lane_u32(a, b, c, 1); 9134 } 9135 9136 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 9137 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> <i32 1, i32 1> 9138 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 9139 // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] 9140 // CHECK: ret <2 x float> [[ADD]] 9141 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 9142 return vmla_lane_f32(a, b, c, 1); 9143 } 9144 9145 // CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { 9146 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 9147 // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 9148 // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 9149 // CHECK: ret <8 x i16> [[ADD]] 9150 int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { 9151 return vmlaq_lane_s16(a, b, c, 3); 9152 } 9153 9154 // CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { 9155 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9156 // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 9157 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 9158 // CHECK: ret <4 x i32> [[ADD]] 9159 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { 9160 return vmlaq_lane_s32(a, b, c, 1); 9161 } 9162 9163 // CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { 9164 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 9165 // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 9166 // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] 9167 // CHECK: ret <8 x i16> [[ADD]] 9168 uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) { 9169 return vmlaq_lane_u16(a, b, c, 3); 9170 } 9171 9172 // CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { 9173 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9174 // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 9175 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] 9176 // CHECK: ret <4 x i32> [[ADD]] 9177 uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) { 9178 return vmlaq_lane_u32(a, b, c, 1); 9179 } 9180 9181 // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 { 9182 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9183 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 9184 // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] 9185 // CHECK: ret <4 x float> [[ADD]] 9186 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) { 9187 return vmlaq_lane_f32(a, b, c, 1); 9188 } 9189 9190 9191 // CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(<4 x i16> %a, <4 x i16> %b, i16 signext %c) #0 { 9192 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9193 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9194 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9195 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9196 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 9197 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 9198 // CHECK: ret <4 x i16> [[ADD_I]] 9199 int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) { 9200 return vmla_n_s16(a, b, c); 9201 } 9202 9203 // CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 9204 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9205 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9206 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 9207 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 9208 // CHECK: ret <2 x i32> [[ADD_I]] 9209 int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) { 9210 return vmla_n_s32(a, b, c); 9211 } 9212 9213 // CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(<4 x i16> %a, <4 x i16> %b, i16 zeroext %c) #0 { 9214 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9215 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9216 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9217 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9218 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 9219 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] 9220 // CHECK: ret <4 x i16> [[ADD_I]] 9221 uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { 9222 return vmla_n_u16(a, b, c); 9223 } 9224 9225 // CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 9226 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9227 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9228 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 9229 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] 9230 // CHECK: ret <2 x i32> [[ADD_I]] 9231 uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { 9232 return vmla_n_u32(a, b, c); 9233 } 9234 9235 // CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { 9236 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 9237 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 9238 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] 9239 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]] 9240 // CHECK: ret <2 x float> [[ADD_I]] 9241 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 9242 return vmla_n_f32(a, b, c); 9243 } 9244 9245 // CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) #0 { 9246 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 9247 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 9248 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 9249 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 9250 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 9251 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 9252 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 9253 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 9254 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 9255 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 9256 // CHECK: ret <8 x i16> [[ADD_I]] 9257 int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { 9258 return vmlaq_n_s16(a, b, c); 9259 } 9260 9261 // CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 9262 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 9263 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 9264 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 9265 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 9266 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 9267 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 9268 // CHECK: ret <4 x i32> [[ADD_I]] 9269 int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { 9270 return vmlaq_n_s32(a, b, c); 9271 } 9272 9273 // CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) #0 { 9274 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 9275 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 9276 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 9277 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 9278 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 9279 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 9280 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 9281 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 9282 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 9283 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] 9284 // CHECK: ret <8 x i16> [[ADD_I]] 9285 uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { 9286 return vmlaq_n_u16(a, b, c); 9287 } 9288 9289 // CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 9290 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 9291 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 9292 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 9293 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 9294 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 9295 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] 9296 // CHECK: ret <4 x i32> [[ADD_I]] 9297 uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { 9298 return vmlaq_n_u32(a, b, c); 9299 } 9300 9301 // CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { 9302 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 9303 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 9304 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 9305 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 9306 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] 9307 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]] 9308 // CHECK: ret <4 x float> [[ADD_I]] 9309 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 9310 return vmlaq_n_f32(a, b, c); 9311 } 9312 9313 9314 // CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 9315 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c 9316 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]] 9317 // CHECK: ret <8 x i8> [[SUB_I]] 9318 int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) { 9319 return vmls_s8(a, b, c); 9320 } 9321 9322 // CHECK-LABEL: define <4 x i16> @test_vmls_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9323 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c 9324 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 9325 // CHECK: ret <4 x i16> [[SUB_I]] 9326 int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 9327 return vmls_s16(a, b, c); 9328 } 9329 9330 // CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9331 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c 9332 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 9333 // CHECK: ret <2 x i32> [[SUB_I]] 9334 int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 9335 return vmls_s32(a, b, c); 9336 } 9337 9338 // CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 9339 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c 9340 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]] 9341 // CHECK: ret <2 x float> [[SUB_I]] 9342 float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 9343 return vmls_f32(a, b, c); 9344 } 9345 9346 // CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 9347 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c 9348 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]] 9349 // CHECK: ret <8 x i8> [[SUB_I]] 9350 uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 9351 return vmls_u8(a, b, c); 9352 } 9353 9354 // CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9355 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c 9356 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 9357 // CHECK: ret <4 x i16> [[SUB_I]] 9358 uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 9359 return vmls_u16(a, b, c); 9360 } 9361 9362 // CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9363 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c 9364 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 9365 // CHECK: ret <2 x i32> [[SUB_I]] 9366 uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 9367 return vmls_u32(a, b, c); 9368 } 9369 9370 // CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 9371 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c 9372 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]] 9373 // CHECK: ret <16 x i8> [[SUB_I]] 9374 int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { 9375 return vmlsq_s8(a, b, c); 9376 } 9377 9378 // CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9379 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c 9380 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 9381 // CHECK: ret <8 x i16> [[SUB_I]] 9382 int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { 9383 return vmlsq_s16(a, b, c); 9384 } 9385 9386 // CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9387 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c 9388 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 9389 // CHECK: ret <4 x i32> [[SUB_I]] 9390 int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { 9391 return vmlsq_s32(a, b, c); 9392 } 9393 9394 // CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 9395 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c 9396 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]] 9397 // CHECK: ret <4 x float> [[SUB_I]] 9398 float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { 9399 return vmlsq_f32(a, b, c); 9400 } 9401 9402 // CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { 9403 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c 9404 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]] 9405 // CHECK: ret <16 x i8> [[SUB_I]] 9406 uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { 9407 return vmlsq_u8(a, b, c); 9408 } 9409 9410 // CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { 9411 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c 9412 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 9413 // CHECK: ret <8 x i16> [[SUB_I]] 9414 uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { 9415 return vmlsq_u16(a, b, c); 9416 } 9417 9418 // CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { 9419 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c 9420 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 9421 // CHECK: ret <4 x i32> [[SUB_I]] 9422 uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { 9423 return vmlsq_u32(a, b, c); 9424 } 9425 9426 9427 // CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 9428 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4 9429 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 9430 // CHECK: ret <8 x i16> [[SUB_I]] 9431 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 9432 return vmlsl_s8(a, b, c); 9433 } 9434 9435 // CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9436 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9437 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 9438 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9439 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9440 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9441 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9442 // CHECK: ret <4 x i32> [[SUB_I]] 9443 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 9444 return vmlsl_s16(a, b, c); 9445 } 9446 9447 // CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9448 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9449 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9450 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9451 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9452 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9453 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9454 // CHECK: ret <2 x i64> [[SUB_I]] 9455 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9456 return vmlsl_s32(a, b, c); 9457 } 9458 9459 // CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { 9460 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4 9461 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 9462 // CHECK: ret <8 x i16> [[SUB_I]] 9463 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 9464 return vmlsl_u8(a, b, c); 9465 } 9466 9467 // CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 9470 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9471 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9472 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9473 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9474 // CHECK: ret <4 x i32> [[SUB_I]] 9475 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 9476 return vmlsl_u16(a, b, c); 9477 } 9478 9479 // CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9480 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9481 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 9482 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9483 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9484 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9485 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9486 // CHECK: ret <2 x i64> [[SUB_I]] 9487 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 9488 return vmlsl_u32(a, b, c); 9489 } 9490 9491 9492 // CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9493 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9494 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9495 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 9496 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9497 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9498 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 9499 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 9500 // CHECK: ret <4 x i32> [[SUB]] 9501 int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 9502 return vmlsl_lane_s16(a, b, c, 3); 9503 } 9504 9505 // CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9506 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9507 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9508 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 9509 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9510 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9511 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 9512 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 9513 // CHECK: ret <2 x i64> [[SUB]] 9514 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 9515 return vmlsl_lane_s32(a, b, c, 1); 9516 } 9517 9518 // CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9519 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9520 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9521 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 9522 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9523 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9524 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 9525 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] 9526 // CHECK: ret <4 x i32> [[SUB]] 9527 uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 9528 return vmlsl_lane_u16(a, b, c, 3); 9529 } 9530 9531 // CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9532 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9533 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9534 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 9535 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9536 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9537 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 9538 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] 9539 // CHECK: ret <2 x i64> [[SUB]] 9540 uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 9541 return vmlsl_lane_u32(a, b, c, 1); 9542 } 9543 9544 9545 // CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { 9546 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9547 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9548 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9549 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9550 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9551 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 9552 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9553 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9554 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9555 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9556 // CHECK: ret <4 x i32> [[SUB_I]] 9557 int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 9558 return vmlsl_n_s16(a, b, c); 9559 } 9560 9561 // CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 9562 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9563 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9564 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9565 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 9566 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9567 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9568 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9569 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9570 // CHECK: ret <2 x i64> [[SUB_I]] 9571 int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 9572 return vmlsl_n_s32(a, b, c); 9573 } 9574 9575 // CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(<4 x i32> %a, <4 x i16> %b, i16 zeroext %c) #0 { 9576 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9577 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9578 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9579 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 9581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 9582 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9583 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 9584 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 9585 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 9586 // CHECK: ret <4 x i32> [[SUB_I]] 9587 uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { 9588 return vmlsl_n_u16(a, b, c); 9589 } 9590 9591 // CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 9592 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9593 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9594 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 9595 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 9596 // CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9597 // CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 9598 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 9599 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 9600 // CHECK: ret <2 x i64> [[SUB_I]] 9601 uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { 9602 return vmlsl_n_u32(a, b, c); 9603 } 9604 9605 9606 // CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9607 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9608 // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 9609 // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 9610 // CHECK: ret <4 x i16> [[SUB]] 9611 int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { 9612 return vmls_lane_s16(a, b, c, 3); 9613 } 9614 9615 // CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9616 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9617 // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 9618 // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 9619 // CHECK: ret <2 x i32> [[SUB]] 9620 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { 9621 return vmls_lane_s32(a, b, c, 1); 9622 } 9623 9624 // CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { 9625 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 9626 // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] 9627 // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] 9628 // CHECK: ret <4 x i16> [[SUB]] 9629 uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { 9630 return vmls_lane_u16(a, b, c, 3); 9631 } 9632 9633 // CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { 9634 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 9635 // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] 9636 // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] 9637 // CHECK: ret <2 x i32> [[SUB]] 9638 uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { 9639 return vmls_lane_u32(a, b, c, 1); 9640 } 9641 9642 // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { 9643 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> <i32 1, i32 1> 9644 // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] 9645 // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] 9646 // CHECK: ret <2 x float> [[SUB]] 9647 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) { 9648 return vmls_lane_f32(a, b, c, 1); 9649 } 9650 9651 // CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { 9652 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 9653 // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 9654 // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 9655 // CHECK: ret <8 x i16> [[SUB]] 9656 int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { 9657 return vmlsq_lane_s16(a, b, c, 3); 9658 } 9659 9660 // CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { 9661 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9662 // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 9663 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 9664 // CHECK: ret <4 x i32> [[SUB]] 9665 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { 9666 return vmlsq_lane_s32(a, b, c, 1); 9667 } 9668 9669 // CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { 9670 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 9671 // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] 9672 // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] 9673 // CHECK: ret <8 x i16> [[SUB]] 9674 uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) { 9675 return vmlsq_lane_u16(a, b, c, 3); 9676 } 9677 9678 // CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { 9679 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9680 // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] 9681 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] 9682 // CHECK: ret <4 x i32> [[SUB]] 9683 uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) { 9684 return vmlsq_lane_u32(a, b, c, 1); 9685 } 9686 9687 // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 { 9688 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9689 // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] 9690 // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] 9691 // CHECK: ret <4 x float> [[SUB]] 9692 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) { 9693 return vmlsq_lane_f32(a, b, c, 1); 9694 } 9695 9696 9697 // CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(<4 x i16> %a, <4 x i16> %b, i16 signext %c) #0 { 9698 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9699 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9700 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9701 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9702 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 9703 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 9704 // CHECK: ret <4 x i16> [[SUB_I]] 9705 int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) { 9706 return vmls_n_s16(a, b, c); 9707 } 9708 9709 // CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 9710 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9711 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9712 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 9713 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 9714 // CHECK: ret <2 x i32> [[SUB_I]] 9715 int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) { 9716 return vmls_n_s32(a, b, c); 9717 } 9718 9719 // CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(<4 x i16> %a, <4 x i16> %b, i16 zeroext %c) #0 { 9720 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 9721 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 9722 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 9723 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 9724 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] 9725 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] 9726 // CHECK: ret <4 x i16> [[SUB_I]] 9727 uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { 9728 return vmls_n_u16(a, b, c); 9729 } 9730 9731 // CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { 9732 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 9733 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 9734 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] 9735 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] 9736 // CHECK: ret <2 x i32> [[SUB_I]] 9737 uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { 9738 return vmls_n_u32(a, b, c); 9739 } 9740 9741 // CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { 9742 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 9743 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 9744 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] 9745 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]] 9746 // CHECK: ret <2 x float> [[SUB_I]] 9747 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { 9748 return vmls_n_f32(a, b, c); 9749 } 9750 9751 // CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) #0 { 9752 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 9753 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 9754 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 9755 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 9756 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 9757 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 9758 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 9759 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 9760 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 9761 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 9762 // CHECK: ret <8 x i16> [[SUB_I]] 9763 int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { 9764 return vmlsq_n_s16(a, b, c); 9765 } 9766 9767 // CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 9768 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 9769 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 9770 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 9771 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 9772 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 9773 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 9774 // CHECK: ret <4 x i32> [[SUB_I]] 9775 int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { 9776 return vmlsq_n_s32(a, b, c); 9777 } 9778 9779 // CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) #0 { 9780 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 9781 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 9782 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 9783 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 9784 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 9785 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 9786 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 9787 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 9788 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] 9789 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] 9790 // CHECK: ret <8 x i16> [[SUB_I]] 9791 uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { 9792 return vmlsq_n_u16(a, b, c); 9793 } 9794 9795 // CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { 9796 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 9797 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 9798 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 9799 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 9800 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] 9801 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] 9802 // CHECK: ret <4 x i32> [[SUB_I]] 9803 uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { 9804 return vmlsq_n_u32(a, b, c); 9805 } 9806 9807 // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { 9808 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 9809 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 9810 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 9811 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 9812 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] 9813 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]] 9814 // CHECK: ret <4 x float> [[SUB_I]] 9815 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { 9816 return vmlsq_n_f32(a, b, c); 9817 } 9818 9819 9820 // CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 { 9821 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16> 9822 // CHECK: ret <8 x i16> [[VMOVL_I]] 9823 int16x8_t test_vmovl_s8(int8x8_t a) { 9824 return vmovl_s8(a); 9825 } 9826 9827 // CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 { 9828 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 9829 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9830 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 9831 // CHECK: ret <4 x i32> [[VMOVL_I]] 9832 int32x4_t test_vmovl_s16(int16x4_t a) { 9833 return vmovl_s16(a); 9834 } 9835 9836 // CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 { 9837 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 9838 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9839 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 9840 // CHECK: ret <2 x i64> [[VMOVL_I]] 9841 int64x2_t test_vmovl_s32(int32x2_t a) { 9842 return vmovl_s32(a); 9843 } 9844 9845 // CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 { 9846 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16> 9847 // CHECK: ret <8 x i16> [[VMOVL_I]] 9848 uint16x8_t test_vmovl_u8(uint8x8_t a) { 9849 return vmovl_u8(a); 9850 } 9851 9852 // CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 { 9853 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 9854 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 9855 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 9856 // CHECK: ret <4 x i32> [[VMOVL_I]] 9857 uint32x4_t test_vmovl_u16(uint16x4_t a) { 9858 return vmovl_u16(a); 9859 } 9860 9861 // CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 { 9862 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 9863 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 9864 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 9865 // CHECK: ret <2 x i64> [[VMOVL_I]] 9866 uint64x2_t test_vmovl_u32(uint32x2_t a) { 9867 return vmovl_u32(a); 9868 } 9869 9870 9871 // CHECK-LABEL: define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 { 9872 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 9873 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 9874 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> 9875 // CHECK: ret <8 x i8> [[VMOVN_I]] 9876 int8x8_t test_vmovn_s16(int16x8_t a) { 9877 return vmovn_s16(a); 9878 } 9879 9880 // CHECK-LABEL: define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 { 9881 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9882 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9883 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> 9884 // CHECK: ret <4 x i16> [[VMOVN_I]] 9885 int16x4_t test_vmovn_s32(int32x4_t a) { 9886 return vmovn_s32(a); 9887 } 9888 9889 // CHECK-LABEL: define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 { 9890 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9891 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9892 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> 9893 // CHECK: ret <2 x i32> [[VMOVN_I]] 9894 int32x2_t test_vmovn_s64(int64x2_t a) { 9895 return vmovn_s64(a); 9896 } 9897 9898 // CHECK-LABEL: define <8 x i8> @test_vmovn_u16(<8 x i16> %a) #0 { 9899 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 9900 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 9901 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> 9902 // CHECK: ret <8 x i8> [[VMOVN_I]] 9903 uint8x8_t test_vmovn_u16(uint16x8_t a) { 9904 return vmovn_u16(a); 9905 } 9906 9907 // CHECK-LABEL: define <4 x i16> @test_vmovn_u32(<4 x i32> %a) #0 { 9908 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 9909 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 9910 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> 9911 // CHECK: ret <4 x i16> [[VMOVN_I]] 9912 uint16x4_t test_vmovn_u32(uint32x4_t a) { 9913 return vmovn_u32(a); 9914 } 9915 9916 // CHECK-LABEL: define <2 x i32> @test_vmovn_u64(<2 x i64> %a) #0 { 9917 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 9918 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 9919 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> 9920 // CHECK: ret <2 x i32> [[VMOVN_I]] 9921 uint32x2_t test_vmovn_u64(uint64x2_t a) { 9922 return vmovn_u64(a); 9923 } 9924 9925 9926 // CHECK-LABEL: define <8 x i8> @test_vmov_n_u8(i8 zeroext %a) #0 { 9927 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 9928 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 9929 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 9930 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 9931 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 9932 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 9933 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 9934 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 9935 // CHECK: ret <8 x i8> [[VECINIT7_I]] 9936 uint8x8_t test_vmov_n_u8(uint8_t a) { 9937 return vmov_n_u8(a); 9938 } 9939 9940 // CHECK-LABEL: define <4 x i16> @test_vmov_n_u16(i16 zeroext %a) #0 { 9941 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 9942 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 9943 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 9944 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 9945 // CHECK: ret <4 x i16> [[VECINIT3_I]] 9946 uint16x4_t test_vmov_n_u16(uint16_t a) { 9947 return vmov_n_u16(a); 9948 } 9949 9950 // CHECK-LABEL: define <2 x i32> @test_vmov_n_u32(i32 %a) #0 { 9951 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 9952 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 9953 // CHECK: ret <2 x i32> [[VECINIT1_I]] 9954 uint32x2_t test_vmov_n_u32(uint32_t a) { 9955 return vmov_n_u32(a); 9956 } 9957 9958 // CHECK-LABEL: define <8 x i8> @test_vmov_n_s8(i8 signext %a) #0 { 9959 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 9960 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 9961 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 9962 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 9963 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 9964 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 9965 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 9966 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 9967 // CHECK: ret <8 x i8> [[VECINIT7_I]] 9968 int8x8_t test_vmov_n_s8(int8_t a) { 9969 return vmov_n_s8(a); 9970 } 9971 9972 // CHECK-LABEL: define <4 x i16> @test_vmov_n_s16(i16 signext %a) #0 { 9973 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 9974 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 9975 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 9976 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 9977 // CHECK: ret <4 x i16> [[VECINIT3_I]] 9978 int16x4_t test_vmov_n_s16(int16_t a) { 9979 return vmov_n_s16(a); 9980 } 9981 9982 // CHECK-LABEL: define <2 x i32> @test_vmov_n_s32(i32 %a) #0 { 9983 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 9984 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 9985 // CHECK: ret <2 x i32> [[VECINIT1_I]] 9986 int32x2_t test_vmov_n_s32(int32_t a) { 9987 return vmov_n_s32(a); 9988 } 9989 9990 // CHECK-LABEL: define <8 x i8> @test_vmov_n_p8(i8 signext %a) #0 { 9991 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 9992 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 9993 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 9994 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3 9995 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4 9996 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5 9997 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6 9998 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7 9999 // CHECK: ret <8 x i8> [[VECINIT7_I]] 10000 poly8x8_t test_vmov_n_p8(poly8_t a) { 10001 return vmov_n_p8(a); 10002 } 10003 10004 // CHECK-LABEL: define <4 x i16> @test_vmov_n_p16(i16 signext %a) #0 { 10005 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 10006 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 10007 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 10008 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3 10009 // CHECK: ret <4 x i16> [[VECINIT3_I]] 10010 poly16x4_t test_vmov_n_p16(poly16_t a) { 10011 return vmov_n_p16(a); 10012 } 10013 10014 // CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a) #0 { 10015 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 10016 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0 10017 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1 10018 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2 10019 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3 10020 // CHECK: ret <4 x half> [[VECINIT3]] 10021 float16x4_t test_vmov_n_f16(float16_t *a) { 10022 return vmov_n_f16(*a); 10023 } 10024 10025 // CHECK-LABEL: define <2 x float> @test_vmov_n_f32(float %a) #0 { 10026 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0 10027 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1 10028 // CHECK: ret <2 x float> [[VECINIT1_I]] 10029 float32x2_t test_vmov_n_f32(float32_t a) { 10030 return vmov_n_f32(a); 10031 } 10032 10033 // CHECK-LABEL: define <16 x i8> @test_vmovq_n_u8(i8 zeroext %a) #0 { 10034 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 10035 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 10036 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 10037 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 10038 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 10039 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 10040 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 10041 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 10042 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 10043 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 10044 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 10045 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 10046 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 10047 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 10048 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 10049 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 10050 // CHECK: ret <16 x i8> [[VECINIT15_I]] 10051 uint8x16_t test_vmovq_n_u8(uint8_t a) { 10052 return vmovq_n_u8(a); 10053 } 10054 10055 // CHECK-LABEL: define <8 x i16> @test_vmovq_n_u16(i16 zeroext %a) #0 { 10056 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 10057 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 10058 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 10059 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 10060 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 10061 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 10062 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 10063 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 10064 // CHECK: ret <8 x i16> [[VECINIT7_I]] 10065 uint16x8_t test_vmovq_n_u16(uint16_t a) { 10066 return vmovq_n_u16(a); 10067 } 10068 10069 // CHECK-LABEL: define <4 x i32> @test_vmovq_n_u32(i32 %a) #0 { 10070 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 10071 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 10072 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 10073 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3 10074 // CHECK: ret <4 x i32> [[VECINIT3_I]] 10075 uint32x4_t test_vmovq_n_u32(uint32_t a) { 10076 return vmovq_n_u32(a); 10077 } 10078 10079 // CHECK-LABEL: define <16 x i8> @test_vmovq_n_s8(i8 signext %a) #0 { 10080 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 10081 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 10082 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 10083 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 10084 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 10085 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 10086 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 10087 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 10088 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 10089 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 10090 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 10091 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 10092 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 10093 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 10094 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 10095 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 10096 // CHECK: ret <16 x i8> [[VECINIT15_I]] 10097 int8x16_t test_vmovq_n_s8(int8_t a) { 10098 return vmovq_n_s8(a); 10099 } 10100 10101 // CHECK-LABEL: define <8 x i16> @test_vmovq_n_s16(i16 signext %a) #0 { 10102 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 10103 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 10104 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 10105 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 10106 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 10107 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 10108 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 10109 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 10110 // CHECK: ret <8 x i16> [[VECINIT7_I]] 10111 int16x8_t test_vmovq_n_s16(int16_t a) { 10112 return vmovq_n_s16(a); 10113 } 10114 10115 // CHECK-LABEL: define <4 x i32> @test_vmovq_n_s32(i32 %a) #0 { 10116 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 10117 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 10118 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 10119 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3 10120 // CHECK: ret <4 x i32> [[VECINIT3_I]] 10121 int32x4_t test_vmovq_n_s32(int32_t a) { 10122 return vmovq_n_s32(a); 10123 } 10124 10125 // CHECK-LABEL: define <16 x i8> @test_vmovq_n_p8(i8 signext %a) #0 { 10126 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 10127 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 10128 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 10129 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3 10130 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4 10131 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5 10132 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6 10133 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7 10134 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8 10135 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9 10136 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10 10137 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11 10138 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12 10139 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13 10140 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14 10141 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15 10142 // CHECK: ret <16 x i8> [[VECINIT15_I]] 10143 poly8x16_t test_vmovq_n_p8(poly8_t a) { 10144 return vmovq_n_p8(a); 10145 } 10146 10147 // CHECK-LABEL: define <8 x i16> @test_vmovq_n_p16(i16 signext %a) #0 { 10148 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 10149 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 10150 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 10151 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3 10152 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4 10153 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5 10154 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6 10155 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7 10156 // CHECK: ret <8 x i16> [[VECINIT7_I]] 10157 poly16x8_t test_vmovq_n_p16(poly16_t a) { 10158 return vmovq_n_p16(a); 10159 } 10160 10161 // CHECK-LABEL: define <8 x half> @test_vmovq_n_f16(half* %a) #0 { 10162 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 10163 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0 10164 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1 10165 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2 10166 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3 10167 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4 10168 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5 10169 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6 10170 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7 10171 // CHECK: ret <8 x half> [[VECINIT7]] 10172 float16x8_t test_vmovq_n_f16(float16_t *a) { 10173 return vmovq_n_f16(*a); 10174 } 10175 10176 // CHECK-LABEL: define <4 x float> @test_vmovq_n_f32(float %a) #0 { 10177 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0 10178 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1 10179 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2 10180 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3 10181 // CHECK: ret <4 x float> [[VECINIT3_I]] 10182 float32x4_t test_vmovq_n_f32(float32_t a) { 10183 return vmovq_n_f32(a); 10184 } 10185 10186 // CHECK-LABEL: define <1 x i64> @test_vmov_n_s64(i64 %a) #0 { 10187 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 10188 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] 10189 // CHECK: ret <1 x i64> [[ADD_I]] 10190 int64x1_t test_vmov_n_s64(int64_t a) { 10191 int64x1_t tmp = vmov_n_s64(a); 10192 return vadd_s64(tmp, tmp); 10193 } 10194 10195 // CHECK-LABEL: define <1 x i64> @test_vmov_n_u64(i64 %a) #0 { 10196 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 10197 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] 10198 // CHECK: ret <1 x i64> [[ADD_I]] 10199 uint64x1_t test_vmov_n_u64(uint64_t a) { 10200 uint64x1_t tmp = vmov_n_u64(a); 10201 return vadd_u64(tmp, tmp); 10202 } 10203 10204 // CHECK-LABEL: define <2 x i64> @test_vmovq_n_s64(i64 %a) #0 { 10205 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 10206 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 10207 // CHECK: ret <2 x i64> [[VECINIT1_I]] 10208 int64x2_t test_vmovq_n_s64(int64_t a) { 10209 return vmovq_n_s64(a); 10210 } 10211 10212 // CHECK-LABEL: define <2 x i64> @test_vmovq_n_u64(i64 %a) #0 { 10213 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 10214 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 10215 // CHECK: ret <2 x i64> [[VECINIT1_I]] 10216 uint64x2_t test_vmovq_n_u64(uint64_t a) { 10217 return vmovq_n_u64(a); 10218 } 10219 10220 10221 // CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %a, <8 x i8> %b) #0 { 10222 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b 10223 // CHECK: ret <8 x i8> [[MUL_I]] 10224 int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) { 10225 return vmul_s8(a, b); 10226 } 10227 10228 // CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10229 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b 10230 // CHECK: ret <4 x i16> [[MUL_I]] 10231 int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) { 10232 return vmul_s16(a, b); 10233 } 10234 10235 // CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10236 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b 10237 // CHECK: ret <2 x i32> [[MUL_I]] 10238 int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) { 10239 return vmul_s32(a, b); 10240 } 10241 10242 // CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %a, <2 x float> %b) #0 { 10243 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b 10244 // CHECK: ret <2 x float> [[MUL_I]] 10245 float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) { 10246 return vmul_f32(a, b); 10247 } 10248 10249 // CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %a, <8 x i8> %b) #0 { 10250 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b 10251 // CHECK: ret <8 x i8> [[MUL_I]] 10252 uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) { 10253 return vmul_u8(a, b); 10254 } 10255 10256 // CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10257 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b 10258 // CHECK: ret <4 x i16> [[MUL_I]] 10259 uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) { 10260 return vmul_u16(a, b); 10261 } 10262 10263 // CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10264 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b 10265 // CHECK: ret <2 x i32> [[MUL_I]] 10266 uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) { 10267 return vmul_u32(a, b); 10268 } 10269 10270 // CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 10271 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b 10272 // CHECK: ret <16 x i8> [[MUL_I]] 10273 int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) { 10274 return vmulq_s8(a, b); 10275 } 10276 10277 // CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 10278 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b 10279 // CHECK: ret <8 x i16> [[MUL_I]] 10280 int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) { 10281 return vmulq_s16(a, b); 10282 } 10283 10284 // CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 10285 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b 10286 // CHECK: ret <4 x i32> [[MUL_I]] 10287 int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) { 10288 return vmulq_s32(a, b); 10289 } 10290 10291 // CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) #0 { 10292 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b 10293 // CHECK: ret <4 x float> [[MUL_I]] 10294 float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) { 10295 return vmulq_f32(a, b); 10296 } 10297 10298 // CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 10299 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b 10300 // CHECK: ret <16 x i8> [[MUL_I]] 10301 uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) { 10302 return vmulq_u8(a, b); 10303 } 10304 10305 // CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 10306 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b 10307 // CHECK: ret <8 x i16> [[MUL_I]] 10308 uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) { 10309 return vmulq_u16(a, b); 10310 } 10311 10312 // CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 10313 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b 10314 // CHECK: ret <4 x i32> [[MUL_I]] 10315 uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) { 10316 return vmulq_u32(a, b); 10317 } 10318 10319 10320 // CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 { 10321 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) #4 10322 // CHECK: ret <8 x i16> [[VMULL_I]] 10323 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { 10324 return vmull_s8(a, b); 10325 } 10326 10327 // CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10328 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10329 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 10330 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10331 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10332 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 10333 // CHECK: ret <4 x i32> [[VMULL2_I]] 10334 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { 10335 return vmull_s16(a, b); 10336 } 10337 10338 // CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10339 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10340 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 10341 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10342 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10343 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 10344 // CHECK: ret <2 x i64> [[VMULL2_I]] 10345 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { 10346 return vmull_s32(a, b); 10347 } 10348 10349 // CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 { 10350 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) #4 10351 // CHECK: ret <8 x i16> [[VMULL_I]] 10352 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { 10353 return vmull_u8(a, b); 10354 } 10355 10356 // CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10357 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10358 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 10359 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10360 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10361 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 10362 // CHECK: ret <4 x i32> [[VMULL2_I]] 10363 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { 10364 return vmull_u16(a, b); 10365 } 10366 10367 // CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10368 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10369 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 10370 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10371 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10372 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 10373 // CHECK: ret <2 x i64> [[VMULL2_I]] 10374 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { 10375 return vmull_u32(a, b); 10376 } 10377 10378 // CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 { 10379 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) #4 10380 // CHECK: ret <8 x i16> [[VMULL_I]] 10381 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { 10382 return vmull_p8(a, b); 10383 } 10384 10385 10386 // CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10387 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10388 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10389 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 10390 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10391 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10392 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 10393 // CHECK: ret <4 x i32> [[VMULL2_I]] 10394 int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) { 10395 return vmull_lane_s16(a, b, 3); 10396 } 10397 10398 // CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10399 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 10400 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10401 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 10402 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10403 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10404 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 10405 // CHECK: ret <2 x i64> [[VMULL2_I]] 10406 int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) { 10407 return vmull_lane_s32(a, b, 1); 10408 } 10409 10410 // CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10411 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10412 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10413 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 10414 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10415 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10416 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 10417 // CHECK: ret <4 x i32> [[VMULL2_I]] 10418 uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) { 10419 return vmull_lane_u16(a, b, 3); 10420 } 10421 10422 // CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10423 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 10424 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10425 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 10426 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10427 // CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10428 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 10429 // CHECK: ret <2 x i64> [[VMULL2_I]] 10430 uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) { 10431 return vmull_lane_u32(a, b, 1); 10432 } 10433 10434 10435 // CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(<4 x i16> %a, i16 signext %b) #0 { 10436 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10437 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 10438 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 10439 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 10440 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 10441 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 10442 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10443 // CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10444 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4 10445 // CHECK: ret <4 x i32> [[VMULL5_I]] 10446 int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { 10447 return vmull_n_s16(a, b); 10448 } 10449 10450 // CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(<2 x i32> %a, i32 %b) #0 { 10451 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10452 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 10453 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 10454 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 10455 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10456 // CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10457 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4 10458 // CHECK: ret <2 x i64> [[VMULL3_I]] 10459 int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { 10460 return vmull_n_s32(a, b); 10461 } 10462 10463 // CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(<4 x i16> %a, i16 zeroext %b) #0 { 10464 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 10465 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 10466 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 10467 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 10468 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 10469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 10470 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 10471 // CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10472 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4 10473 // CHECK: ret <4 x i32> [[VMULL5_I]] 10474 uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { 10475 return vmull_n_u16(a, b); 10476 } 10477 10478 // CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(<2 x i32> %a, i32 %b) #0 { 10479 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 10480 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 10481 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 10482 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 10483 // CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 10484 // CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10485 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4 10486 // CHECK: ret <2 x i64> [[VMULL3_I]] 10487 uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { 10488 return vmull_n_u32(a, b); 10489 } 10490 10491 10492 // CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %a, <8 x i8> %b) #0 { 10493 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 10494 // CHECK: ret <8 x i8> [[VMUL_V_I]] 10495 poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) { 10496 return vmul_p8(a, b); 10497 } 10498 10499 // CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %a, <16 x i8> %b) #0 { 10500 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 10501 // CHECK: ret <16 x i8> [[VMULQ_V_I]] 10502 poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) { 10503 return vmulq_p8(a, b); 10504 } 10505 10506 10507 // CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10508 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10509 // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 10510 // CHECK: ret <4 x i16> [[MUL]] 10511 int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) { 10512 return vmul_lane_s16(a, b, 3); 10513 } 10514 10515 // CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10516 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 10517 // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 10518 // CHECK: ret <2 x i32> [[MUL]] 10519 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) { 10520 return vmul_lane_s32(a, b, 1); 10521 } 10522 10523 // CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %b) #0 { 10524 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <2 x i32> <i32 1, i32 1> 10525 // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] 10526 // CHECK: ret <2 x float> [[MUL]] 10527 float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) { 10528 return vmul_lane_f32(a, b, 1); 10529 } 10530 10531 // CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10532 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10533 // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] 10534 // CHECK: ret <4 x i16> [[MUL]] 10535 uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) { 10536 return vmul_lane_u16(a, b, 3); 10537 } 10538 10539 // CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10540 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 10541 // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] 10542 // CHECK: ret <2 x i32> [[MUL]] 10543 uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) { 10544 return vmul_lane_u32(a, b, 1); 10545 } 10546 10547 // CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { 10548 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 10549 // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 10550 // CHECK: ret <8 x i16> [[MUL]] 10551 int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) { 10552 return vmulq_lane_s16(a, b, 3); 10553 } 10554 10555 // CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { 10556 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 10557 // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 10558 // CHECK: ret <4 x i32> [[MUL]] 10559 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) { 10560 return vmulq_lane_s32(a, b, 1); 10561 } 10562 10563 // CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %b) #0 { 10564 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 10565 // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] 10566 // CHECK: ret <4 x float> [[MUL]] 10567 float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) { 10568 return vmulq_lane_f32(a, b, 1); 10569 } 10570 10571 // CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %b) #0 { 10572 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 10573 // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] 10574 // CHECK: ret <8 x i16> [[MUL]] 10575 uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) { 10576 return vmulq_lane_u16(a, b, 3); 10577 } 10578 10579 // CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %b) #0 { 10580 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 10581 // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] 10582 // CHECK: ret <4 x i32> [[MUL]] 10583 uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) { 10584 return vmulq_lane_u32(a, b, 1); 10585 } 10586 10587 10588 // CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(<4 x i16> %a, i16 signext %b) #0 { 10589 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 10590 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 10591 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 10592 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 10593 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]] 10594 // CHECK: ret <4 x i16> [[MUL_I]] 10595 int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) { 10596 return vmul_n_s16(a, b); 10597 } 10598 10599 // CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(<2 x i32> %a, i32 %b) #0 { 10600 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 10601 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 10602 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] 10603 // CHECK: ret <2 x i32> [[MUL_I]] 10604 int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) { 10605 return vmul_n_s32(a, b); 10606 } 10607 10608 // CHECK-LABEL: define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 { 10609 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0 10610 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1 10611 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]] 10612 // CHECK: ret <2 x float> [[MUL_I]] 10613 float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) { 10614 return vmul_n_f32(a, b); 10615 } 10616 10617 // CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(<4 x i16> %a, i16 zeroext %b) #0 { 10618 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 10619 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 10620 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 10621 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 10622 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]] 10623 // CHECK: ret <4 x i16> [[MUL_I]] 10624 uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) { 10625 return vmul_n_u16(a, b); 10626 } 10627 10628 // CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(<2 x i32> %a, i32 %b) #0 { 10629 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 10630 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 10631 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] 10632 // CHECK: ret <2 x i32> [[MUL_I]] 10633 uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) { 10634 return vmul_n_u32(a, b); 10635 } 10636 10637 // CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(<8 x i16> %a, i16 signext %b) #0 { 10638 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 10639 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 10640 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 10641 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 10642 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 10643 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 10644 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 10645 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 10646 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]] 10647 // CHECK: ret <8 x i16> [[MUL_I]] 10648 int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) { 10649 return vmulq_n_s16(a, b); 10650 } 10651 10652 // CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(<4 x i32> %a, i32 %b) #0 { 10653 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 10654 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 10655 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 10656 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 10657 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]] 10658 // CHECK: ret <4 x i32> [[MUL_I]] 10659 int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) { 10660 return vmulq_n_s32(a, b); 10661 } 10662 10663 // CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 { 10664 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0 10665 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1 10666 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2 10667 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3 10668 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]] 10669 // CHECK: ret <4 x float> [[MUL_I]] 10670 float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { 10671 return vmulq_n_f32(a, b); 10672 } 10673 10674 // CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(<8 x i16> %a, i16 zeroext %b) #0 { 10675 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 10676 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 10677 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 10678 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 10679 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 10680 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 10681 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 10682 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 10683 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]] 10684 // CHECK: ret <8 x i16> [[MUL_I]] 10685 uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) { 10686 return vmulq_n_u16(a, b); 10687 } 10688 10689 // CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(<4 x i32> %a, i32 %b) #0 { 10690 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 10691 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 10692 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 10693 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 10694 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]] 10695 // CHECK: ret <4 x i32> [[MUL_I]] 10696 uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { 10697 return vmulq_n_u32(a, b); 10698 } 10699 10700 10701 // CHECK-LABEL: define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 { 10702 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10703 // CHECK: ret <8 x i8> [[NEG_I]] 10704 int8x8_t test_vmvn_s8(int8x8_t a) { 10705 return vmvn_s8(a); 10706 } 10707 10708 // CHECK-LABEL: define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 { 10709 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1> 10710 // CHECK: ret <4 x i16> [[NEG_I]] 10711 int16x4_t test_vmvn_s16(int16x4_t a) { 10712 return vmvn_s16(a); 10713 } 10714 10715 // CHECK-LABEL: define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 { 10716 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1> 10717 // CHECK: ret <2 x i32> [[NEG_I]] 10718 int32x2_t test_vmvn_s32(int32x2_t a) { 10719 return vmvn_s32(a); 10720 } 10721 10722 // CHECK-LABEL: define <8 x i8> @test_vmvn_u8(<8 x i8> %a) #0 { 10723 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10724 // CHECK: ret <8 x i8> [[NEG_I]] 10725 uint8x8_t test_vmvn_u8(uint8x8_t a) { 10726 return vmvn_u8(a); 10727 } 10728 10729 // CHECK-LABEL: define <4 x i16> @test_vmvn_u16(<4 x i16> %a) #0 { 10730 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1> 10731 // CHECK: ret <4 x i16> [[NEG_I]] 10732 uint16x4_t test_vmvn_u16(uint16x4_t a) { 10733 return vmvn_u16(a); 10734 } 10735 10736 // CHECK-LABEL: define <2 x i32> @test_vmvn_u32(<2 x i32> %a) #0 { 10737 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1> 10738 // CHECK: ret <2 x i32> [[NEG_I]] 10739 uint32x2_t test_vmvn_u32(uint32x2_t a) { 10740 return vmvn_u32(a); 10741 } 10742 10743 // CHECK-LABEL: define <8 x i8> @test_vmvn_p8(<8 x i8> %a) #0 { 10744 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10745 // CHECK: ret <8 x i8> [[NEG_I]] 10746 poly8x8_t test_vmvn_p8(poly8x8_t a) { 10747 return vmvn_p8(a); 10748 } 10749 10750 // CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 { 10751 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10752 // CHECK: ret <16 x i8> [[NEG_I]] 10753 int8x16_t test_vmvnq_s8(int8x16_t a) { 10754 return vmvnq_s8(a); 10755 } 10756 10757 // CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 { 10758 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 10759 // CHECK: ret <8 x i16> [[NEG_I]] 10760 int16x8_t test_vmvnq_s16(int16x8_t a) { 10761 return vmvnq_s16(a); 10762 } 10763 10764 // CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 { 10765 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 10766 // CHECK: ret <4 x i32> [[NEG_I]] 10767 int32x4_t test_vmvnq_s32(int32x4_t a) { 10768 return vmvnq_s32(a); 10769 } 10770 10771 // CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(<16 x i8> %a) #0 { 10772 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10773 // CHECK: ret <16 x i8> [[NEG_I]] 10774 uint8x16_t test_vmvnq_u8(uint8x16_t a) { 10775 return vmvnq_u8(a); 10776 } 10777 10778 // CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(<8 x i16> %a) #0 { 10779 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 10780 // CHECK: ret <8 x i16> [[NEG_I]] 10781 uint16x8_t test_vmvnq_u16(uint16x8_t a) { 10782 return vmvnq_u16(a); 10783 } 10784 10785 // CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(<4 x i32> %a) #0 { 10786 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 10787 // CHECK: ret <4 x i32> [[NEG_I]] 10788 uint32x4_t test_vmvnq_u32(uint32x4_t a) { 10789 return vmvnq_u32(a); 10790 } 10791 10792 // CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(<16 x i8> %a) #0 { 10793 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10794 // CHECK: ret <16 x i8> [[NEG_I]] 10795 poly8x16_t test_vmvnq_p8(poly8x16_t a) { 10796 return vmvnq_p8(a); 10797 } 10798 10799 10800 // CHECK-LABEL: define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 { 10801 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a 10802 // CHECK: ret <8 x i8> [[SUB_I]] 10803 int8x8_t test_vneg_s8(int8x8_t a) { 10804 return vneg_s8(a); 10805 } 10806 10807 // CHECK-LABEL: define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 { 10808 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a 10809 // CHECK: ret <4 x i16> [[SUB_I]] 10810 int16x4_t test_vneg_s16(int16x4_t a) { 10811 return vneg_s16(a); 10812 } 10813 10814 // CHECK-LABEL: define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 { 10815 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a 10816 // CHECK: ret <2 x i32> [[SUB_I]] 10817 int32x2_t test_vneg_s32(int32x2_t a) { 10818 return vneg_s32(a); 10819 } 10820 10821 // CHECK-LABEL: define <2 x float> @test_vneg_f32(<2 x float> %a) #0 { 10822 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 10823 // CHECK: ret <2 x float> [[SUB_I]] 10824 float32x2_t test_vneg_f32(float32x2_t a) { 10825 return vneg_f32(a); 10826 } 10827 10828 // CHECK-LABEL: define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 { 10829 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a 10830 // CHECK: ret <16 x i8> [[SUB_I]] 10831 int8x16_t test_vnegq_s8(int8x16_t a) { 10832 return vnegq_s8(a); 10833 } 10834 10835 // CHECK-LABEL: define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 { 10836 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a 10837 // CHECK: ret <8 x i16> [[SUB_I]] 10838 int16x8_t test_vnegq_s16(int16x8_t a) { 10839 return vnegq_s16(a); 10840 } 10841 10842 // CHECK-LABEL: define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 { 10843 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a 10844 // CHECK: ret <4 x i32> [[SUB_I]] 10845 int32x4_t test_vnegq_s32(int32x4_t a) { 10846 return vnegq_s32(a); 10847 } 10848 10849 // CHECK-LABEL: define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 { 10850 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 10851 // CHECK: ret <4 x float> [[SUB_I]] 10852 float32x4_t test_vnegq_f32(float32x4_t a) { 10853 return vnegq_f32(a); 10854 } 10855 10856 10857 // CHECK-LABEL: define <8 x i8> @test_vorn_s8(<8 x i8> %a, <8 x i8> %b) #0 { 10858 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10859 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] 10860 // CHECK: ret <8 x i8> [[OR_I]] 10861 int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { 10862 return vorn_s8(a, b); 10863 } 10864 10865 // CHECK-LABEL: define <4 x i16> @test_vorn_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10866 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1> 10867 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] 10868 // CHECK: ret <4 x i16> [[OR_I]] 10869 int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { 10870 return vorn_s16(a, b); 10871 } 10872 10873 // CHECK-LABEL: define <2 x i32> @test_vorn_s32(<2 x i32> %a, <2 x i32> %b) #0 { 10874 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1> 10875 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] 10876 // CHECK: ret <2 x i32> [[OR_I]] 10877 int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { 10878 return vorn_s32(a, b); 10879 } 10880 10881 // CHECK-LABEL: define <1 x i64> @test_vorn_s64(<1 x i64> %a, <1 x i64> %b) #0 { 10882 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1> 10883 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] 10884 // CHECK: ret <1 x i64> [[OR_I]] 10885 int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { 10886 return vorn_s64(a, b); 10887 } 10888 10889 // CHECK-LABEL: define <8 x i8> @test_vorn_u8(<8 x i8> %a, <8 x i8> %b) #0 { 10890 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10891 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] 10892 // CHECK: ret <8 x i8> [[OR_I]] 10893 uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { 10894 return vorn_u8(a, b); 10895 } 10896 10897 // CHECK-LABEL: define <4 x i16> @test_vorn_u16(<4 x i16> %a, <4 x i16> %b) #0 { 10898 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1> 10899 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] 10900 // CHECK: ret <4 x i16> [[OR_I]] 10901 uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { 10902 return vorn_u16(a, b); 10903 } 10904 10905 // CHECK-LABEL: define <2 x i32> @test_vorn_u32(<2 x i32> %a, <2 x i32> %b) #0 { 10906 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1> 10907 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] 10908 // CHECK: ret <2 x i32> [[OR_I]] 10909 uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { 10910 return vorn_u32(a, b); 10911 } 10912 10913 // CHECK-LABEL: define <1 x i64> @test_vorn_u64(<1 x i64> %a, <1 x i64> %b) #0 { 10914 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1> 10915 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] 10916 // CHECK: ret <1 x i64> [[OR_I]] 10917 uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { 10918 return vorn_u64(a, b); 10919 } 10920 10921 // CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 10922 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10923 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] 10924 // CHECK: ret <16 x i8> [[OR_I]] 10925 int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { 10926 return vornq_s8(a, b); 10927 } 10928 10929 // CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 10930 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 10931 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] 10932 // CHECK: ret <8 x i16> [[OR_I]] 10933 int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { 10934 return vornq_s16(a, b); 10935 } 10936 10937 // CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 10938 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 10939 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] 10940 // CHECK: ret <4 x i32> [[OR_I]] 10941 int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { 10942 return vornq_s32(a, b); 10943 } 10944 10945 // CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 10946 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1> 10947 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] 10948 // CHECK: ret <2 x i64> [[OR_I]] 10949 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { 10950 return vornq_s64(a, b); 10951 } 10952 10953 // CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 10954 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 10955 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] 10956 // CHECK: ret <16 x i8> [[OR_I]] 10957 uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { 10958 return vornq_u8(a, b); 10959 } 10960 10961 // CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 10962 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 10963 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] 10964 // CHECK: ret <8 x i16> [[OR_I]] 10965 uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { 10966 return vornq_u16(a, b); 10967 } 10968 10969 // CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 10970 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 10971 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] 10972 // CHECK: ret <4 x i32> [[OR_I]] 10973 uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { 10974 return vornq_u32(a, b); 10975 } 10976 10977 // CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 10978 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1> 10979 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] 10980 // CHECK: ret <2 x i64> [[OR_I]] 10981 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { 10982 return vornq_u64(a, b); 10983 } 10984 10985 10986 // CHECK-LABEL: define <8 x i8> @test_vorr_s8(<8 x i8> %a, <8 x i8> %b) #0 { 10987 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b 10988 // CHECK: ret <8 x i8> [[OR_I]] 10989 int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) { 10990 return vorr_s8(a, b); 10991 } 10992 10993 // CHECK-LABEL: define <4 x i16> @test_vorr_s16(<4 x i16> %a, <4 x i16> %b) #0 { 10994 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b 10995 // CHECK: ret <4 x i16> [[OR_I]] 10996 int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) { 10997 return vorr_s16(a, b); 10998 } 10999 11000 // CHECK-LABEL: define <2 x i32> @test_vorr_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11001 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b 11002 // CHECK: ret <2 x i32> [[OR_I]] 11003 int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) { 11004 return vorr_s32(a, b); 11005 } 11006 11007 // CHECK-LABEL: define <1 x i64> @test_vorr_s64(<1 x i64> %a, <1 x i64> %b) #0 { 11008 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b 11009 // CHECK: ret <1 x i64> [[OR_I]] 11010 int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) { 11011 return vorr_s64(a, b); 11012 } 11013 11014 // CHECK-LABEL: define <8 x i8> @test_vorr_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11015 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b 11016 // CHECK: ret <8 x i8> [[OR_I]] 11017 uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) { 11018 return vorr_u8(a, b); 11019 } 11020 11021 // CHECK-LABEL: define <4 x i16> @test_vorr_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11022 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b 11023 // CHECK: ret <4 x i16> [[OR_I]] 11024 uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) { 11025 return vorr_u16(a, b); 11026 } 11027 11028 // CHECK-LABEL: define <2 x i32> @test_vorr_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11029 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b 11030 // CHECK: ret <2 x i32> [[OR_I]] 11031 uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) { 11032 return vorr_u32(a, b); 11033 } 11034 11035 // CHECK-LABEL: define <1 x i64> @test_vorr_u64(<1 x i64> %a, <1 x i64> %b) #0 { 11036 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b 11037 // CHECK: ret <1 x i64> [[OR_I]] 11038 uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) { 11039 return vorr_u64(a, b); 11040 } 11041 11042 // CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 11043 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b 11044 // CHECK: ret <16 x i8> [[OR_I]] 11045 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) { 11046 return vorrq_s8(a, b); 11047 } 11048 11049 // CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 11050 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b 11051 // CHECK: ret <8 x i16> [[OR_I]] 11052 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) { 11053 return vorrq_s16(a, b); 11054 } 11055 11056 // CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 11057 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b 11058 // CHECK: ret <4 x i32> [[OR_I]] 11059 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) { 11060 return vorrq_s32(a, b); 11061 } 11062 11063 // CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 11064 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b 11065 // CHECK: ret <2 x i64> [[OR_I]] 11066 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) { 11067 return vorrq_s64(a, b); 11068 } 11069 11070 // CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 11071 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b 11072 // CHECK: ret <16 x i8> [[OR_I]] 11073 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) { 11074 return vorrq_u8(a, b); 11075 } 11076 11077 // CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 11078 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b 11079 // CHECK: ret <8 x i16> [[OR_I]] 11080 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) { 11081 return vorrq_u16(a, b); 11082 } 11083 11084 // CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 11085 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b 11086 // CHECK: ret <4 x i32> [[OR_I]] 11087 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) { 11088 return vorrq_u32(a, b); 11089 } 11090 11091 // CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 11092 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b 11093 // CHECK: ret <2 x i64> [[OR_I]] 11094 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) { 11095 return vorrq_u64(a, b); 11096 } 11097 11098 11099 // CHECK-LABEL: define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 { 11100 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11101 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11102 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> %b) #4 11103 // CHECK: ret <4 x i16> [[VPADAL_V1_I]] 11104 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) { 11105 return vpadal_s8(a, b); 11106 } 11107 11108 // CHECK-LABEL: define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 { 11109 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11110 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11111 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11112 // CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11113 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]]) #4 11114 // CHECK: ret <2 x i32> [[VPADAL_V2_I]] 11115 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) { 11116 return vpadal_s16(a, b); 11117 } 11118 11119 // CHECK-LABEL: define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 { 11120 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 11121 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11122 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 11123 // CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11124 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]]) #4 11125 // CHECK: ret <1 x i64> [[VPADAL_V2_I]] 11126 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) { 11127 return vpadal_s32(a, b); 11128 } 11129 11130 // CHECK-LABEL: define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 { 11131 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11132 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11133 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> %b) #4 11134 // CHECK: ret <4 x i16> [[VPADAL_V1_I]] 11135 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) { 11136 return vpadal_u8(a, b); 11137 } 11138 11139 // CHECK-LABEL: define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 { 11140 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11141 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11142 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11143 // CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11144 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]]) #4 11145 // CHECK: ret <2 x i32> [[VPADAL_V2_I]] 11146 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) { 11147 return vpadal_u16(a, b); 11148 } 11149 11150 // CHECK-LABEL: define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 { 11151 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 11152 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11153 // CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 11154 // CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11155 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]]) #4 11156 // CHECK: ret <1 x i64> [[VPADAL_V2_I]] 11157 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) { 11158 return vpadal_u32(a, b); 11159 } 11160 11161 // CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 { 11162 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11163 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11164 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> %b) #4 11165 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]] 11166 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) { 11167 return vpadalq_s8(a, b); 11168 } 11169 11170 // CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 { 11171 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11172 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11173 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11174 // CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11175 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]]) #4 11176 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]] 11177 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { 11178 return vpadalq_s16(a, b); 11179 } 11180 11181 // CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 { 11182 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11183 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11184 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11185 // CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11186 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]]) #4 11187 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]] 11188 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { 11189 return vpadalq_s32(a, b); 11190 } 11191 11192 // CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 { 11193 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11194 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11195 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> %b) #4 11196 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]] 11197 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) { 11198 return vpadalq_u8(a, b); 11199 } 11200 11201 // CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 { 11202 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11203 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11204 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11205 // CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11206 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]]) #4 11207 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]] 11208 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) { 11209 return vpadalq_u16(a, b); 11210 } 11211 11212 // CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 { 11213 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11214 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11215 // CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11216 // CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11217 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]]) #4 11218 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]] 11219 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) { 11220 return vpadalq_u32(a, b); 11221 } 11222 11223 11224 // CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 11225 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11226 // CHECK: ret <8 x i8> [[VPADD_V_I]] 11227 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { 11228 return vpadd_s8(a, b); 11229 } 11230 11231 // CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 11232 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11233 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11234 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11235 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11236 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 11237 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 11238 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> 11239 // CHECK: ret <4 x i16> [[TMP2]] 11240 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { 11241 return vpadd_s16(a, b); 11242 } 11243 11244 // CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11245 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11246 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11247 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11248 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11249 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 11250 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 11251 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> 11252 // CHECK: ret <2 x i32> [[TMP2]] 11253 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { 11254 return vpadd_s32(a, b); 11255 } 11256 11257 // CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11258 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11259 // CHECK: ret <8 x i8> [[VPADD_V_I]] 11260 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { 11261 return vpadd_u8(a, b); 11262 } 11263 11264 // CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11265 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11266 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11267 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11268 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11269 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 11270 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 11271 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> 11272 // CHECK: ret <4 x i16> [[TMP2]] 11273 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { 11274 return vpadd_u16(a, b); 11275 } 11276 11277 // CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11278 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11279 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11280 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11281 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11282 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 11283 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 11284 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> 11285 // CHECK: ret <2 x i32> [[TMP2]] 11286 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { 11287 return vpadd_u32(a, b); 11288 } 11289 11290 // CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 { 11291 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 11292 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 11293 // CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 11294 // CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 11295 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4 11296 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> 11297 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float> 11298 // CHECK: ret <2 x float> [[TMP2]] 11299 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { 11300 return vpadd_f32(a, b); 11301 } 11302 11303 11304 // CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 { 11305 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4 11306 // CHECK: ret <4 x i16> [[VPADDL_I]] 11307 int16x4_t test_vpaddl_s8(int8x8_t a) { 11308 return vpaddl_s8(a); 11309 } 11310 11311 // CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 { 11312 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11313 // CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11314 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4 11315 // CHECK: ret <2 x i32> [[VPADDL1_I]] 11316 int32x2_t test_vpaddl_s16(int16x4_t a) { 11317 return vpaddl_s16(a); 11318 } 11319 11320 // CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 { 11321 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11322 // CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11323 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4 11324 // CHECK: ret <1 x i64> [[VPADDL1_I]] 11325 int64x1_t test_vpaddl_s32(int32x2_t a) { 11326 return vpaddl_s32(a); 11327 } 11328 11329 // CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 { 11330 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4 11331 // CHECK: ret <4 x i16> [[VPADDL_I]] 11332 uint16x4_t test_vpaddl_u8(uint8x8_t a) { 11333 return vpaddl_u8(a); 11334 } 11335 11336 // CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 { 11337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11338 // CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11339 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4 11340 // CHECK: ret <2 x i32> [[VPADDL1_I]] 11341 uint32x2_t test_vpaddl_u16(uint16x4_t a) { 11342 return vpaddl_u16(a); 11343 } 11344 11345 // CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 { 11346 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11347 // CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11348 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4 11349 // CHECK: ret <1 x i64> [[VPADDL1_I]] 11350 uint64x1_t test_vpaddl_u32(uint32x2_t a) { 11351 return vpaddl_u32(a); 11352 } 11353 11354 // CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 { 11355 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4 11356 // CHECK: ret <8 x i16> [[VPADDL_I]] 11357 int16x8_t test_vpaddlq_s8(int8x16_t a) { 11358 return vpaddlq_s8(a); 11359 } 11360 11361 // CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 { 11362 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11363 // CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11364 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4 11365 // CHECK: ret <4 x i32> [[VPADDL1_I]] 11366 int32x4_t test_vpaddlq_s16(int16x8_t a) { 11367 return vpaddlq_s16(a); 11368 } 11369 11370 // CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 { 11371 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11372 // CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11373 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4 11374 // CHECK: ret <2 x i64> [[VPADDL1_I]] 11375 int64x2_t test_vpaddlq_s32(int32x4_t a) { 11376 return vpaddlq_s32(a); 11377 } 11378 11379 // CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 { 11380 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4 11381 // CHECK: ret <8 x i16> [[VPADDL_I]] 11382 uint16x8_t test_vpaddlq_u8(uint8x16_t a) { 11383 return vpaddlq_u8(a); 11384 } 11385 11386 // CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 { 11387 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11388 // CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11389 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4 11390 // CHECK: ret <4 x i32> [[VPADDL1_I]] 11391 uint32x4_t test_vpaddlq_u16(uint16x8_t a) { 11392 return vpaddlq_u16(a); 11393 } 11394 11395 // CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 { 11396 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11397 // CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11398 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4 11399 // CHECK: ret <2 x i64> [[VPADDL1_I]] 11400 uint64x2_t test_vpaddlq_u32(uint32x4_t a) { 11401 return vpaddlq_u32(a); 11402 } 11403 11404 11405 // CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { 11406 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11407 // CHECK: ret <8 x i8> [[VPMAX_V_I]] 11408 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { 11409 return vpmax_s8(a, b); 11410 } 11411 11412 // CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { 11413 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11414 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11415 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11416 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11417 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]]) #4 11418 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8> 11419 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16> 11420 // CHECK: ret <4 x i16> [[TMP2]] 11421 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { 11422 return vpmax_s16(a, b); 11423 } 11424 11425 // CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11426 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11427 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11428 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11429 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11430 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]]) #4 11431 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8> 11432 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32> 11433 // CHECK: ret <2 x i32> [[TMP2]] 11434 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { 11435 return vpmax_s32(a, b); 11436 } 11437 11438 // CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11439 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11440 // CHECK: ret <8 x i8> [[VPMAX_V_I]] 11441 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { 11442 return vpmax_u8(a, b); 11443 } 11444 11445 // CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11446 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11447 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11448 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11449 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11450 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]]) #4 11451 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8> 11452 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16> 11453 // CHECK: ret <4 x i16> [[TMP2]] 11454 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { 11455 return vpmax_u16(a, b); 11456 } 11457 11458 // CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11459 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11460 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11461 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11462 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11463 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]]) #4 11464 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8> 11465 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32> 11466 // CHECK: ret <2 x i32> [[TMP2]] 11467 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { 11468 return vpmax_u32(a, b); 11469 } 11470 11471 // CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 { 11472 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 11473 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 11474 // CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 11475 // CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 11476 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> [[VPMAX_V_I]], <2 x float> [[VPMAX_V1_I]]) #4 11477 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8> 11478 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x float> 11479 // CHECK: ret <2 x float> [[TMP2]] 11480 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { 11481 return vpmax_f32(a, b); 11482 } 11483 11484 11485 // CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { 11486 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11487 // CHECK: ret <8 x i8> [[VPMIN_V_I]] 11488 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { 11489 return vpmin_s8(a, b); 11490 } 11491 11492 // CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { 11493 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11494 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11495 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11496 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11497 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]]) #4 11498 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8> 11499 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16> 11500 // CHECK: ret <4 x i16> [[TMP2]] 11501 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { 11502 return vpmin_s16(a, b); 11503 } 11504 11505 // CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11507 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11508 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11509 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11510 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]]) #4 11511 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8> 11512 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32> 11513 // CHECK: ret <2 x i32> [[TMP2]] 11514 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { 11515 return vpmin_s32(a, b); 11516 } 11517 11518 // CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11519 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11520 // CHECK: ret <8 x i8> [[VPMIN_V_I]] 11521 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { 11522 return vpmin_u8(a, b); 11523 } 11524 11525 // CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11526 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11527 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11528 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11529 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11530 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]]) #4 11531 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8> 11532 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16> 11533 // CHECK: ret <4 x i16> [[TMP2]] 11534 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { 11535 return vpmin_u16(a, b); 11536 } 11537 11538 // CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11539 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11540 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11541 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11542 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11543 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]]) #4 11544 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8> 11545 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32> 11546 // CHECK: ret <2 x i32> [[TMP2]] 11547 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { 11548 return vpmin_u32(a, b); 11549 } 11550 11551 // CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 { 11552 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 11553 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 11554 // CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 11555 // CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 11556 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> [[VPMIN_V_I]], <2 x float> [[VPMIN_V1_I]]) #4 11557 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8> 11558 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x float> 11559 // CHECK: ret <2 x float> [[TMP2]] 11560 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { 11561 return vpmin_f32(a, b); 11562 } 11563 11564 11565 // CHECK-LABEL: define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 { 11566 // CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4 11567 // CHECK: ret <8 x i8> [[VQABS_V_I]] 11568 int8x8_t test_vqabs_s8(int8x8_t a) { 11569 return vqabs_s8(a); 11570 } 11571 11572 // CHECK-LABEL: define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 { 11573 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11574 // CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11575 // CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> [[VQABS_V_I]]) #4 11576 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8> 11577 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16> 11578 // CHECK: ret <4 x i16> [[TMP1]] 11579 int16x4_t test_vqabs_s16(int16x4_t a) { 11580 return vqabs_s16(a); 11581 } 11582 11583 // CHECK-LABEL: define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 { 11584 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11585 // CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11586 // CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> [[VQABS_V_I]]) #4 11587 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8> 11588 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32> 11589 // CHECK: ret <2 x i32> [[TMP1]] 11590 int32x2_t test_vqabs_s32(int32x2_t a) { 11591 return vqabs_s32(a); 11592 } 11593 11594 // CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 { 11595 // CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4 11596 // CHECK: ret <16 x i8> [[VQABSQ_V_I]] 11597 int8x16_t test_vqabsq_s8(int8x16_t a) { 11598 return vqabsq_s8(a); 11599 } 11600 11601 // CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 { 11602 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11603 // CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11604 // CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> [[VQABSQ_V_I]]) #4 11605 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8> 11606 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16> 11607 // CHECK: ret <8 x i16> [[TMP1]] 11608 int16x8_t test_vqabsq_s16(int16x8_t a) { 11609 return vqabsq_s16(a); 11610 } 11611 11612 // CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 { 11613 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11614 // CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11615 // CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> [[VQABSQ_V_I]]) #4 11616 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8> 11617 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32> 11618 // CHECK: ret <4 x i32> [[TMP1]] 11619 int32x4_t test_vqabsq_s32(int32x4_t a) { 11620 return vqabsq_s32(a); 11621 } 11622 11623 11624 // CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 11625 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11626 // CHECK: ret <8 x i8> [[VQADD_V_I]] 11627 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { 11628 return vqadd_s8(a, b); 11629 } 11630 11631 // CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 11632 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11633 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11634 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11635 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11636 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 11637 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 11638 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> 11639 // CHECK: ret <4 x i16> [[TMP2]] 11640 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { 11641 return vqadd_s16(a, b); 11642 } 11643 11644 // CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 11645 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11646 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11647 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11648 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11649 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 11650 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 11651 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> 11652 // CHECK: ret <2 x i32> [[TMP2]] 11653 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { 11654 return vqadd_s32(a, b); 11655 } 11656 11657 // CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { 11658 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 11659 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 11660 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 11661 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 11662 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 11663 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 11664 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> 11665 // CHECK: ret <1 x i64> [[TMP2]] 11666 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { 11667 return vqadd_s64(a, b); 11668 } 11669 11670 // CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 11671 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 11672 // CHECK: ret <8 x i8> [[VQADD_V_I]] 11673 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { 11674 return vqadd_u8(a, b); 11675 } 11676 11677 // CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 11678 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 11679 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11680 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 11681 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11682 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 11683 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 11684 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> 11685 // CHECK: ret <4 x i16> [[TMP2]] 11686 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { 11687 return vqadd_u16(a, b); 11688 } 11689 11690 // CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 11691 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 11692 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11693 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 11694 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11695 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 11696 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 11697 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> 11698 // CHECK: ret <2 x i32> [[TMP2]] 11699 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { 11700 return vqadd_u32(a, b); 11701 } 11702 11703 // CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { 11704 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 11705 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 11706 // CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 11707 // CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 11708 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 11709 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 11710 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> 11711 // CHECK: ret <1 x i64> [[TMP2]] 11712 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { 11713 return vqadd_u64(a, b); 11714 } 11715 11716 // CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 11717 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 11718 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] 11719 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { 11720 return vqaddq_s8(a, b); 11721 } 11722 11723 // CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 11724 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11725 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11726 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11727 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11728 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 11729 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 11730 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> 11731 // CHECK: ret <8 x i16> [[TMP2]] 11732 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { 11733 return vqaddq_s16(a, b); 11734 } 11735 11736 // CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 11737 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11738 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11739 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11740 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11741 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 11742 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 11743 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> 11744 // CHECK: ret <4 x i32> [[TMP2]] 11745 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { 11746 return vqaddq_s32(a, b); 11747 } 11748 11749 // CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 11750 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11751 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 11752 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11753 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 11754 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 11755 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 11756 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> 11757 // CHECK: ret <2 x i64> [[TMP2]] 11758 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { 11759 return vqaddq_s64(a, b); 11760 } 11761 11762 // CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 11763 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 11764 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] 11765 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { 11766 return vqaddq_u8(a, b); 11767 } 11768 11769 // CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 11770 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 11771 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 11772 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 11773 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 11774 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 11775 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 11776 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> 11777 // CHECK: ret <8 x i16> [[TMP2]] 11778 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { 11779 return vqaddq_u16(a, b); 11780 } 11781 11782 // CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 11783 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11784 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 11785 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11786 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 11787 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 11788 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 11789 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> 11790 // CHECK: ret <4 x i32> [[TMP2]] 11791 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { 11792 return vqaddq_u32(a, b); 11793 } 11794 11795 // CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 11796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11797 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 11798 // CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11799 // CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 11800 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 11801 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 11802 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> 11803 // CHECK: ret <2 x i64> [[TMP2]] 11804 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { 11805 return vqaddq_u64(a, b); 11806 } 11807 11808 11809 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 11810 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11811 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11812 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 11813 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11814 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11815 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 11816 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11817 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 11818 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 11819 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 11820 return vqdmlal_s16(a, b, c); 11821 } 11822 11823 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 11824 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11825 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11826 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 11827 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11828 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11829 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 11830 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11831 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 11832 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 11833 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 11834 return vqdmlal_s32(a, b, c); 11835 } 11836 11837 11838 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 11839 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 11840 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11841 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11842 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 11843 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11844 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11845 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 11846 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11847 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 11848 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 11849 int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 11850 return vqdmlal_lane_s16(a, b, c, 3); 11851 } 11852 11853 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 11854 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 11855 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11856 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11857 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 11858 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11859 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11860 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 11861 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11862 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 11863 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 11864 int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 11865 return vqdmlal_lane_s32(a, b, c, 1); 11866 } 11867 11868 11869 // CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { 11870 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11871 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11872 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 11873 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 11874 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 11875 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 11876 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 11877 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11878 // CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11879 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4 11880 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11881 // CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4 11882 // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] 11883 int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 11884 return vqdmlal_n_s16(a, b, c); 11885 } 11886 11887 // CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 11888 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11889 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11890 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 11891 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 11892 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 11893 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11894 // CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11895 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4 11896 // CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11897 // CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4 11898 // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] 11899 int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 11900 return vqdmlal_n_s32(a, b, c); 11901 } 11902 11903 11904 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 11905 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11906 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11907 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 11908 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11909 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11910 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 11911 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11912 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 11913 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 11914 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 11915 return vqdmlsl_s16(a, b, c); 11916 } 11917 11918 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 11919 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11920 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11921 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 11922 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11923 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11924 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 11925 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11926 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 11927 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 11928 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 11929 return vqdmlsl_s32(a, b, c); 11930 } 11931 11932 11933 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { 11934 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 11935 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11936 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11937 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 11938 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11939 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11940 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 11941 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11942 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 11943 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 11944 int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 11945 return vqdmlsl_lane_s16(a, b, c, 3); 11946 } 11947 11948 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { 11949 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> <i32 1, i32 1> 11950 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11951 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11952 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 11953 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11954 // CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11955 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 11956 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11957 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 11958 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 11959 int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 11960 return vqdmlsl_lane_s32(a, b, c, 1); 11961 } 11962 11963 11964 // CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { 11965 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 11966 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 11967 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 11968 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 11969 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 11970 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 11971 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 11972 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 11973 // CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 11974 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4 11975 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 11976 // CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4 11977 // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] 11978 int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 11979 return vqdmlsl_n_s16(a, b, c); 11980 } 11981 11982 // CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { 11983 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 11984 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 11985 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 11986 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 11987 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 11988 // CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 11989 // CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 11990 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4 11991 // CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 11992 // CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4 11993 // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] 11994 int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 11995 return vqdmlsl_n_s32(a, b, c); 11996 } 11997 11998 11999 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12000 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12001 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12002 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12003 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12004 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 12005 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 12006 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 12007 // CHECK: ret <4 x i16> [[TMP2]] 12008 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { 12009 return vqdmulh_s16(a, b); 12010 } 12011 12012 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12013 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12014 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12015 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12016 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12017 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 12018 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 12019 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 12020 // CHECK: ret <2 x i32> [[TMP2]] 12021 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { 12022 return vqdmulh_s32(a, b); 12023 } 12024 12025 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 12026 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12027 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12028 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12029 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12030 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 12031 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 12032 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 12033 // CHECK: ret <8 x i16> [[TMP2]] 12034 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { 12035 return vqdmulhq_s16(a, b); 12036 } 12037 12038 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 12039 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12040 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12041 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12042 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12043 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 12044 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 12045 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 12046 // CHECK: ret <4 x i32> [[TMP2]] 12047 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { 12048 return vqdmulhq_s32(a, b); 12049 } 12050 12051 12052 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12053 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 12054 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12055 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 12056 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12057 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12058 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 12059 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 12060 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> 12061 // CHECK: ret <4 x i16> [[TMP2]] 12062 int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) { 12063 return vqdmulh_lane_s16(a, b, 3); 12064 } 12065 12066 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12067 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 12068 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12069 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 12070 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12071 // CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12072 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 12073 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 12074 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> 12075 // CHECK: ret <2 x i32> [[TMP2]] 12076 int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) { 12077 return vqdmulh_lane_s32(a, b, 1); 12078 } 12079 12080 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { 12081 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 12082 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12083 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 12084 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12085 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12086 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 12087 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 12088 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> 12089 // CHECK: ret <8 x i16> [[TMP2]] 12090 int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) { 12091 return vqdmulhq_lane_s16(a, b, 3); 12092 } 12093 12094 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { 12095 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 12096 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12097 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 12098 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12099 // CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12100 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 12101 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 12102 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> 12103 // CHECK: ret <4 x i32> [[TMP2]] 12104 int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) { 12105 return vqdmulhq_lane_s32(a, b, 1); 12106 } 12107 12108 12109 // CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(<4 x i16> %a, i16 signext %b) #0 { 12110 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12111 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 12112 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 12113 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 12114 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 12115 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 12116 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12117 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12118 // CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V4_I]]) #4 12119 // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8> 12120 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V6_I]] to <4 x i16> 12121 // CHECK: ret <4 x i16> [[TMP2]] 12122 int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { 12123 return vqdmulh_n_s16(a, b); 12124 } 12125 12126 // CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { 12127 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12128 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 12129 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 12130 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 12131 // CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12132 // CHECK: [[VQDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12133 // CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V2_I]]) #4 12134 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8> 12135 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V4_I]] to <2 x i32> 12136 // CHECK: ret <2 x i32> [[TMP2]] 12137 int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { 12138 return vqdmulh_n_s32(a, b); 12139 } 12140 12141 // CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(<8 x i16> %a, i16 signext %b) #0 { 12142 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12143 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 12144 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 12145 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 12146 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 12147 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 12148 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 12149 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 12150 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 12151 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> 12152 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12153 // CHECK: [[VQDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12154 // CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V8_I]]) #4 12155 // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8> 12156 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V10_I]] to <8 x i16> 12157 // CHECK: ret <8 x i16> [[TMP2]] 12158 int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { 12159 return vqdmulhq_n_s16(a, b); 12160 } 12161 12162 // CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { 12163 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12164 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 12165 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 12166 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 12167 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 12168 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> 12169 // CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12170 // CHECK: [[VQDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12171 // CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V4_I]]) #4 12172 // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8> 12173 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V6_I]] to <4 x i32> 12174 // CHECK: ret <4 x i32> [[TMP2]] 12175 int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { 12176 return vqdmulhq_n_s32(a, b); 12177 } 12178 12179 12180 // CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12181 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12182 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12183 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12184 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12185 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 12186 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 12187 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 12188 // CHECK: ret <4 x i32> [[TMP2]] 12189 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { 12190 return vqdmull_s16(a, b); 12191 } 12192 12193 // CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12194 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12195 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12196 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12197 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12198 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 12199 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 12200 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 12201 // CHECK: ret <2 x i64> [[TMP2]] 12202 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { 12203 return vqdmull_s32(a, b); 12204 } 12205 12206 12207 // CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12208 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 12209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 12211 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12212 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12213 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 12214 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 12215 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> 12216 // CHECK: ret <4 x i32> [[TMP2]] 12217 int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) { 12218 return vqdmull_lane_s16(a, b, 3); 12219 } 12220 12221 // CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12222 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 12223 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12224 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 12225 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12226 // CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12227 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 12228 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 12229 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> 12230 // CHECK: ret <2 x i64> [[TMP2]] 12231 int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) { 12232 return vqdmull_lane_s32(a, b, 1); 12233 } 12234 12235 12236 // CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(<4 x i16> %a, i16 signext %b) #0 { 12237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12238 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 12239 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 12240 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 12241 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 12242 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 12243 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12244 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12245 // CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V4_I]]) #4 12246 // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8> 12247 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I]] to <4 x i32> 12248 // CHECK: ret <4 x i32> [[TMP2]] 12249 int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { 12250 return vqdmull_n_s16(a, b); 12251 } 12252 12253 // CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(<2 x i32> %a, i32 %b) #0 { 12254 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12255 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 12256 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 12257 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 12258 // CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12259 // CHECK: [[VQDMULL_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12260 // CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V2_I]]) #4 12261 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8> 12262 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I]] to <2 x i64> 12263 // CHECK: ret <2 x i64> [[TMP2]] 12264 int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { 12265 return vqdmull_n_s32(a, b); 12266 } 12267 12268 12269 // CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 { 12270 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12271 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12272 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4 12273 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] 12274 int8x8_t test_vqmovn_s16(int16x8_t a) { 12275 return vqmovn_s16(a); 12276 } 12277 12278 // CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 { 12279 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12280 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12281 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4 12282 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> 12283 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16> 12284 // CHECK: ret <4 x i16> [[TMP1]] 12285 int16x4_t test_vqmovn_s32(int32x4_t a) { 12286 return vqmovn_s32(a); 12287 } 12288 12289 // CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 { 12290 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12291 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12292 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4 12293 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> 12294 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32> 12295 // CHECK: ret <2 x i32> [[TMP1]] 12296 int32x2_t test_vqmovn_s64(int64x2_t a) { 12297 return vqmovn_s64(a); 12298 } 12299 12300 // CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 { 12301 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12302 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12303 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4 12304 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] 12305 uint8x8_t test_vqmovn_u16(uint16x8_t a) { 12306 return vqmovn_u16(a); 12307 } 12308 12309 // CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 { 12310 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12311 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12312 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4 12313 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> 12314 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16> 12315 // CHECK: ret <4 x i16> [[TMP1]] 12316 uint16x4_t test_vqmovn_u32(uint32x4_t a) { 12317 return vqmovn_u32(a); 12318 } 12319 12320 // CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 { 12321 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12322 // CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12323 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4 12324 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> 12325 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32> 12326 // CHECK: ret <2 x i32> [[TMP1]] 12327 uint32x2_t test_vqmovn_u64(uint64x2_t a) { 12328 return vqmovn_u64(a); 12329 } 12330 12331 12332 // CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 { 12333 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12334 // CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12335 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> [[VQMOVUN_V_I]]) #4 12336 // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]] 12337 uint8x8_t test_vqmovun_s16(int16x8_t a) { 12338 return vqmovun_s16(a); 12339 } 12340 12341 // CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 { 12342 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12343 // CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12344 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> [[VQMOVUN_V_I]]) #4 12345 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8> 12346 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16> 12347 // CHECK: ret <4 x i16> [[TMP1]] 12348 uint16x4_t test_vqmovun_s32(int32x4_t a) { 12349 return vqmovun_s32(a); 12350 } 12351 12352 // CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 { 12353 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12354 // CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12355 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> [[VQMOVUN_V_I]]) #4 12356 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8> 12357 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32> 12358 // CHECK: ret <2 x i32> [[TMP1]] 12359 uint32x2_t test_vqmovun_s64(int64x2_t a) { 12360 return vqmovun_s64(a); 12361 } 12362 12363 12364 // CHECK-LABEL: define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 { 12365 // CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4 12366 // CHECK: ret <8 x i8> [[VQNEG_V_I]] 12367 int8x8_t test_vqneg_s8(int8x8_t a) { 12368 return vqneg_s8(a); 12369 } 12370 12371 // CHECK-LABEL: define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 { 12372 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12373 // CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12374 // CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> [[VQNEG_V_I]]) #4 12375 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8> 12376 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16> 12377 // CHECK: ret <4 x i16> [[TMP1]] 12378 int16x4_t test_vqneg_s16(int16x4_t a) { 12379 return vqneg_s16(a); 12380 } 12381 12382 // CHECK-LABEL: define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 { 12383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12384 // CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12385 // CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> [[VQNEG_V_I]]) #4 12386 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8> 12387 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32> 12388 // CHECK: ret <2 x i32> [[TMP1]] 12389 int32x2_t test_vqneg_s32(int32x2_t a) { 12390 return vqneg_s32(a); 12391 } 12392 12393 // CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 { 12394 // CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4 12395 // CHECK: ret <16 x i8> [[VQNEGQ_V_I]] 12396 int8x16_t test_vqnegq_s8(int8x16_t a) { 12397 return vqnegq_s8(a); 12398 } 12399 12400 // CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 { 12401 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12402 // CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12403 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]]) #4 12404 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8> 12405 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16> 12406 // CHECK: ret <8 x i16> [[TMP1]] 12407 int16x8_t test_vqnegq_s16(int16x8_t a) { 12408 return vqnegq_s16(a); 12409 } 12410 12411 // CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 { 12412 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12413 // CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12414 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]]) #4 12415 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8> 12416 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32> 12417 // CHECK: ret <4 x i32> [[TMP1]] 12418 int32x4_t test_vqnegq_s32(int32x4_t a) { 12419 return vqnegq_s32(a); 12420 } 12421 12422 12423 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12424 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12425 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12426 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12427 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12428 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 12429 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 12430 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 12431 // CHECK: ret <4 x i16> [[TMP2]] 12432 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { 12433 return vqrdmulh_s16(a, b); 12434 } 12435 12436 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12437 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12438 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12439 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12440 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12441 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 12442 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 12443 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 12444 // CHECK: ret <2 x i32> [[TMP2]] 12445 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { 12446 return vqrdmulh_s32(a, b); 12447 } 12448 12449 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 12450 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12451 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12452 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12453 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12454 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 12455 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 12456 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 12457 // CHECK: ret <8 x i16> [[TMP2]] 12458 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { 12459 return vqrdmulhq_s16(a, b); 12460 } 12461 12462 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 12463 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12464 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12465 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12466 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12467 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 12468 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 12469 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 12470 // CHECK: ret <4 x i32> [[TMP2]] 12471 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { 12472 return vqrdmulhq_s32(a, b); 12473 } 12474 12475 12476 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12477 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 12478 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12479 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> 12480 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12481 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12482 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 12483 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 12484 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> 12485 // CHECK: ret <4 x i16> [[TMP2]] 12486 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) { 12487 return vqrdmulh_lane_s16(a, b, 3); 12488 } 12489 12490 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12491 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> <i32 1, i32 1> 12492 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12493 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> 12494 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12495 // CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12496 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 12497 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 12498 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> 12499 // CHECK: ret <2 x i32> [[TMP2]] 12500 int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) { 12501 return vqrdmulh_lane_s32(a, b, 1); 12502 } 12503 12504 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { 12505 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 12506 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12507 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> 12508 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12509 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12510 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 12511 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 12512 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> 12513 // CHECK: ret <8 x i16> [[TMP2]] 12514 int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) { 12515 return vqrdmulhq_lane_s16(a, b, 3); 12516 } 12517 12518 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { 12519 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 12520 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12521 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> 12522 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12523 // CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12524 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 12525 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 12526 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> 12527 // CHECK: ret <4 x i32> [[TMP2]] 12528 int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) { 12529 return vqrdmulhq_lane_s32(a, b, 1); 12530 } 12531 12532 12533 // CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(<4 x i16> %a, i16 signext %b) #0 { 12534 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12535 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 12536 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 12537 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 12538 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 12539 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 12540 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12541 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12542 // CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V4_I]]) #4 12543 // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8> 12544 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V6_I]] to <4 x i16> 12545 // CHECK: ret <4 x i16> [[TMP2]] 12546 int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { 12547 return vqrdmulh_n_s16(a, b); 12548 } 12549 12550 // CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { 12551 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12552 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 12553 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 12554 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 12555 // CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12556 // CHECK: [[VQRDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12557 // CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V2_I]]) #4 12558 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8> 12559 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V4_I]] to <2 x i32> 12560 // CHECK: ret <2 x i32> [[TMP2]] 12561 int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { 12562 return vqrdmulh_n_s32(a, b); 12563 } 12564 12565 // CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(<8 x i16> %a, i16 signext %b) #0 { 12566 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12567 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 12568 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 12569 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 12570 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 12571 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 12572 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 12573 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 12574 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 12575 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> 12576 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12577 // CHECK: [[VQRDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12578 // CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V8_I]]) #4 12579 // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8> 12580 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V10_I]] to <8 x i16> 12581 // CHECK: ret <8 x i16> [[TMP2]] 12582 int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { 12583 return vqrdmulhq_n_s16(a, b); 12584 } 12585 12586 // CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { 12587 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12588 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 12589 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 12590 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 12591 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 12592 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> 12593 // CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12594 // CHECK: [[VQRDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12595 // CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V4_I]]) #4 12596 // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8> 12597 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V6_I]] to <4 x i32> 12598 // CHECK: ret <4 x i32> [[TMP2]] 12599 int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { 12600 return vqrdmulhq_n_s32(a, b); 12601 } 12602 12603 12604 // CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 12605 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 12606 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] 12607 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { 12608 return vqrshl_s8(a, b); 12609 } 12610 12611 // CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12612 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12613 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12614 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12615 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12616 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 12617 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 12618 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> 12619 // CHECK: ret <4 x i16> [[TMP2]] 12620 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { 12621 return vqrshl_s16(a, b); 12622 } 12623 12624 // CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12626 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12627 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12628 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12629 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 12630 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 12631 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> 12632 // CHECK: ret <2 x i32> [[TMP2]] 12633 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { 12634 return vqrshl_s32(a, b); 12635 } 12636 12637 // CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 12638 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 12639 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 12640 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 12641 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 12642 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 12643 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 12644 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> 12645 // CHECK: ret <1 x i64> [[TMP2]] 12646 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { 12647 return vqrshl_s64(a, b); 12648 } 12649 12650 // CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 12651 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 12652 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] 12653 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { 12654 return vqrshl_u8(a, b); 12655 } 12656 12657 // CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 12658 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12659 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12660 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12661 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12662 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 12663 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 12664 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> 12665 // CHECK: ret <4 x i16> [[TMP2]] 12666 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { 12667 return vqrshl_u16(a, b); 12668 } 12669 12670 // CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 12671 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12672 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12673 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12674 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12675 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 12676 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 12677 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> 12678 // CHECK: ret <2 x i32> [[TMP2]] 12679 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { 12680 return vqrshl_u32(a, b); 12681 } 12682 12683 // CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 12684 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 12685 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 12686 // CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 12687 // CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 12688 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 12689 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 12690 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> 12691 // CHECK: ret <1 x i64> [[TMP2]] 12692 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { 12693 return vqrshl_u64(a, b); 12694 } 12695 12696 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 12697 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 12698 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 12699 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { 12700 return vqrshlq_s8(a, b); 12701 } 12702 12703 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 12704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12705 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12706 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12707 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12708 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 12709 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 12710 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> 12711 // CHECK: ret <8 x i16> [[TMP2]] 12712 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { 12713 return vqrshlq_s16(a, b); 12714 } 12715 12716 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 12717 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12718 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12719 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12720 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12721 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 12722 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 12723 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> 12724 // CHECK: ret <4 x i32> [[TMP2]] 12725 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { 12726 return vqrshlq_s32(a, b); 12727 } 12728 12729 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 12730 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12731 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 12732 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12733 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 12734 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 12735 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 12736 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> 12737 // CHECK: ret <2 x i64> [[TMP2]] 12738 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { 12739 return vqrshlq_s64(a, b); 12740 } 12741 12742 // CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 12743 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 12744 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 12745 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { 12746 return vqrshlq_u8(a, b); 12747 } 12748 12749 // CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 12750 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12751 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12752 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12753 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12754 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 12755 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 12756 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> 12757 // CHECK: ret <8 x i16> [[TMP2]] 12758 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { 12759 return vqrshlq_u16(a, b); 12760 } 12761 12762 // CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 12763 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12764 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12765 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12766 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12767 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 12768 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 12769 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> 12770 // CHECK: ret <4 x i32> [[TMP2]] 12771 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { 12772 return vqrshlq_u32(a, b); 12773 } 12774 12775 // CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 12776 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12777 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 12778 // CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12779 // CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 12780 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 12781 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 12782 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> 12783 // CHECK: ret <2 x i64> [[TMP2]] 12784 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { 12785 return vqrshlq_u64(a, b); 12786 } 12787 12788 12789 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 { 12790 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12791 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12792 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 12793 // CHECK: ret <8 x i8> [[VQRSHRN_N1]] 12794 int8x8_t test_vqrshrn_n_s16(int16x8_t a) { 12795 return vqrshrn_n_s16(a, 1); 12796 } 12797 12798 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 { 12799 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12800 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12801 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 12802 // CHECK: ret <4 x i16> [[VQRSHRN_N1]] 12803 int16x4_t test_vqrshrn_n_s32(int32x4_t a) { 12804 return vqrshrn_n_s32(a, 1); 12805 } 12806 12807 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 { 12808 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12809 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12810 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 12811 // CHECK: ret <2 x i32> [[VQRSHRN_N1]] 12812 int32x2_t test_vqrshrn_n_s64(int64x2_t a) { 12813 return vqrshrn_n_s64(a, 1); 12814 } 12815 12816 // CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 { 12817 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12818 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12819 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 12820 // CHECK: ret <8 x i8> [[VQRSHRN_N1]] 12821 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { 12822 return vqrshrn_n_u16(a, 1); 12823 } 12824 12825 // CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 { 12826 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12827 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12828 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 12829 // CHECK: ret <4 x i16> [[VQRSHRN_N1]] 12830 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { 12831 return vqrshrn_n_u32(a, 1); 12832 } 12833 12834 // CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 { 12835 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12836 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12837 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 12838 // CHECK: ret <2 x i32> [[VQRSHRN_N1]] 12839 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { 12840 return vqrshrn_n_u64(a, 1); 12841 } 12842 12843 12844 // CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 { 12845 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12846 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12847 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 12848 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]] 12849 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { 12850 return vqrshrun_n_s16(a, 1); 12851 } 12852 12853 // CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 { 12854 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12855 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12856 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 12857 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]] 12858 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { 12859 return vqrshrun_n_s32(a, 1); 12860 } 12861 12862 // CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 { 12863 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12864 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 12865 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> <i64 -1, i64 -1>) 12866 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]] 12867 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) { 12868 return vqrshrun_n_s64(a, 1); 12869 } 12870 12871 12872 // CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 12873 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 12874 // CHECK: ret <8 x i8> [[VQSHL_V_I]] 12875 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { 12876 return vqshl_s8(a, b); 12877 } 12878 12879 // CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 12880 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12881 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12882 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12883 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12884 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 12885 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 12886 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> 12887 // CHECK: ret <4 x i16> [[TMP2]] 12888 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { 12889 return vqshl_s16(a, b); 12890 } 12891 12892 // CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 12893 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12894 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12895 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12896 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12897 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 12898 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 12899 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> 12900 // CHECK: ret <2 x i32> [[TMP2]] 12901 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { 12902 return vqshl_s32(a, b); 12903 } 12904 12905 // CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 12906 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 12907 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 12908 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 12909 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 12910 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 12911 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 12912 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> 12913 // CHECK: ret <1 x i64> [[TMP2]] 12914 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { 12915 return vqshl_s64(a, b); 12916 } 12917 12918 // CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 12919 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 12920 // CHECK: ret <8 x i8> [[VQSHL_V_I]] 12921 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { 12922 return vqshl_u8(a, b); 12923 } 12924 12925 // CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 12926 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 12927 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 12928 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12929 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 12930 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 12931 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 12932 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> 12933 // CHECK: ret <4 x i16> [[TMP2]] 12934 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { 12935 return vqshl_u16(a, b); 12936 } 12937 12938 // CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 12939 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 12940 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 12941 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 12942 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 12943 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 12944 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 12945 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> 12946 // CHECK: ret <2 x i32> [[TMP2]] 12947 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { 12948 return vqshl_u32(a, b); 12949 } 12950 12951 // CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 12952 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 12953 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 12954 // CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 12955 // CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 12956 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 12957 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 12958 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> 12959 // CHECK: ret <1 x i64> [[TMP2]] 12960 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { 12961 return vqshl_u64(a, b); 12962 } 12963 12964 // CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 12965 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 12966 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 12967 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { 12968 return vqshlq_s8(a, b); 12969 } 12970 12971 // CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 12972 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 12973 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 12974 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 12975 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 12976 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 12977 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 12978 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> 12979 // CHECK: ret <8 x i16> [[TMP2]] 12980 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { 12981 return vqshlq_s16(a, b); 12982 } 12983 12984 // CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 12985 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 12986 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 12987 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 12988 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 12989 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 12990 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 12991 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> 12992 // CHECK: ret <4 x i32> [[TMP2]] 12993 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { 12994 return vqshlq_s32(a, b); 12995 } 12996 12997 // CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 12998 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 12999 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13000 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13001 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13002 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 13003 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 13004 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> 13005 // CHECK: ret <2 x i64> [[TMP2]] 13006 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { 13007 return vqshlq_s64(a, b); 13008 } 13009 13010 // CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 13011 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 13012 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 13013 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { 13014 return vqshlq_u8(a, b); 13015 } 13016 13017 // CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 13018 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13019 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13020 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13021 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13022 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 13023 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 13024 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> 13025 // CHECK: ret <8 x i16> [[TMP2]] 13026 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { 13027 return vqshlq_u16(a, b); 13028 } 13029 13030 // CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 13031 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13032 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13033 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13034 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13035 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 13036 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 13037 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> 13038 // CHECK: ret <4 x i32> [[TMP2]] 13039 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { 13040 return vqshlq_u32(a, b); 13041 } 13042 13043 // CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 13044 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13045 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13046 // CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13047 // CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13048 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 13049 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 13050 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> 13051 // CHECK: ret <2 x i64> [[TMP2]] 13052 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { 13053 return vqshlq_u64(a, b); 13054 } 13055 13056 13057 // CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 { 13058 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13059 // CHECK: ret <8 x i8> [[VQSHLU_N]] 13060 uint8x8_t test_vqshlu_n_s8(int8x8_t a) { 13061 return vqshlu_n_s8(a, 1); 13062 } 13063 13064 // CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 { 13065 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13066 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13067 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 13068 // CHECK: ret <4 x i16> [[VQSHLU_N1]] 13069 uint16x4_t test_vqshlu_n_s16(int16x4_t a) { 13070 return vqshlu_n_s16(a, 1); 13071 } 13072 13073 // CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 { 13074 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13075 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13076 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>) 13077 // CHECK: ret <2 x i32> [[VQSHLU_N1]] 13078 uint32x2_t test_vqshlu_n_s32(int32x2_t a) { 13079 return vqshlu_n_s32(a, 1); 13080 } 13081 13082 // CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 { 13083 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13084 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13085 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>) 13086 // CHECK: ret <1 x i64> [[VQSHLU_N1]] 13087 uint64x1_t test_vqshlu_n_s64(int64x1_t a) { 13088 return vqshlu_n_s64(a, 1); 13089 } 13090 13091 // CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 { 13092 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13093 // CHECK: ret <16 x i8> [[VQSHLU_N]] 13094 uint8x16_t test_vqshluq_n_s8(int8x16_t a) { 13095 return vqshluq_n_s8(a, 1); 13096 } 13097 13098 // CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 { 13099 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13100 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13101 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 13102 // CHECK: ret <8 x i16> [[VQSHLU_N1]] 13103 uint16x8_t test_vqshluq_n_s16(int16x8_t a) { 13104 return vqshluq_n_s16(a, 1); 13105 } 13106 13107 // CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 { 13108 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13109 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13110 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 13111 // CHECK: ret <4 x i32> [[VQSHLU_N1]] 13112 uint32x4_t test_vqshluq_n_s32(int32x4_t a) { 13113 return vqshluq_n_s32(a, 1); 13114 } 13115 13116 // CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 { 13117 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13118 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13119 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>) 13120 // CHECK: ret <2 x i64> [[VQSHLU_N1]] 13121 uint64x2_t test_vqshluq_n_s64(int64x2_t a) { 13122 return vqshluq_n_s64(a, 1); 13123 } 13124 13125 13126 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 { 13127 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13128 // CHECK: ret <8 x i8> [[VQSHL_N]] 13129 int8x8_t test_vqshl_n_s8(int8x8_t a) { 13130 return vqshl_n_s8(a, 1); 13131 } 13132 13133 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 { 13134 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13135 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13136 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 13137 // CHECK: ret <4 x i16> [[VQSHL_N1]] 13138 int16x4_t test_vqshl_n_s16(int16x4_t a) { 13139 return vqshl_n_s16(a, 1); 13140 } 13141 13142 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 { 13143 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13144 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13145 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>) 13146 // CHECK: ret <2 x i32> [[VQSHL_N1]] 13147 int32x2_t test_vqshl_n_s32(int32x2_t a) { 13148 return vqshl_n_s32(a, 1); 13149 } 13150 13151 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 { 13152 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13153 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13154 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>) 13155 // CHECK: ret <1 x i64> [[VQSHL_N1]] 13156 int64x1_t test_vqshl_n_s64(int64x1_t a) { 13157 return vqshl_n_s64(a, 1); 13158 } 13159 13160 // CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 { 13161 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13162 // CHECK: ret <8 x i8> [[VQSHL_N]] 13163 uint8x8_t test_vqshl_n_u8(uint8x8_t a) { 13164 return vqshl_n_u8(a, 1); 13165 } 13166 13167 // CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 { 13168 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13169 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13170 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 13171 // CHECK: ret <4 x i16> [[VQSHL_N1]] 13172 uint16x4_t test_vqshl_n_u16(uint16x4_t a) { 13173 return vqshl_n_u16(a, 1); 13174 } 13175 13176 // CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 { 13177 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13178 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13179 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>) 13180 // CHECK: ret <2 x i32> [[VQSHL_N1]] 13181 uint32x2_t test_vqshl_n_u32(uint32x2_t a) { 13182 return vqshl_n_u32(a, 1); 13183 } 13184 13185 // CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 { 13186 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13187 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13188 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>) 13189 // CHECK: ret <1 x i64> [[VQSHL_N1]] 13190 uint64x1_t test_vqshl_n_u64(uint64x1_t a) { 13191 return vqshl_n_u64(a, 1); 13192 } 13193 13194 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 { 13195 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13196 // CHECK: ret <16 x i8> [[VQSHL_N]] 13197 int8x16_t test_vqshlq_n_s8(int8x16_t a) { 13198 return vqshlq_n_s8(a, 1); 13199 } 13200 13201 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 { 13202 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13203 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13204 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 13205 // CHECK: ret <8 x i16> [[VQSHL_N1]] 13206 int16x8_t test_vqshlq_n_s16(int16x8_t a) { 13207 return vqshlq_n_s16(a, 1); 13208 } 13209 13210 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 { 13211 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13212 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13213 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 13214 // CHECK: ret <4 x i32> [[VQSHL_N1]] 13215 int32x4_t test_vqshlq_n_s32(int32x4_t a) { 13216 return vqshlq_n_s32(a, 1); 13217 } 13218 13219 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 { 13220 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13221 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13222 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>) 13223 // CHECK: ret <2 x i64> [[VQSHL_N1]] 13224 int64x2_t test_vqshlq_n_s64(int64x2_t a) { 13225 return vqshlq_n_s64(a, 1); 13226 } 13227 13228 // CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 { 13229 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 13230 // CHECK: ret <16 x i8> [[VQSHL_N]] 13231 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { 13232 return vqshlq_n_u8(a, 1); 13233 } 13234 13235 // CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 { 13236 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13237 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13238 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 13239 // CHECK: ret <8 x i16> [[VQSHL_N1]] 13240 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { 13241 return vqshlq_n_u16(a, 1); 13242 } 13243 13244 // CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 { 13245 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13246 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13247 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 13248 // CHECK: ret <4 x i32> [[VQSHL_N1]] 13249 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { 13250 return vqshlq_n_u32(a, 1); 13251 } 13252 13253 // CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 { 13254 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13255 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13256 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>) 13257 // CHECK: ret <2 x i64> [[VQSHL_N1]] 13258 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { 13259 return vqshlq_n_u64(a, 1); 13260 } 13261 13262 13263 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 { 13264 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13265 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13266 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 13267 // CHECK: ret <8 x i8> [[VQSHRN_N1]] 13268 int8x8_t test_vqshrn_n_s16(int16x8_t a) { 13269 return vqshrn_n_s16(a, 1); 13270 } 13271 13272 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 { 13273 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13274 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13275 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 13276 // CHECK: ret <4 x i16> [[VQSHRN_N1]] 13277 int16x4_t test_vqshrn_n_s32(int32x4_t a) { 13278 return vqshrn_n_s32(a, 1); 13279 } 13280 13281 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 { 13282 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13283 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13284 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 13285 // CHECK: ret <2 x i32> [[VQSHRN_N1]] 13286 int32x2_t test_vqshrn_n_s64(int64x2_t a) { 13287 return vqshrn_n_s64(a, 1); 13288 } 13289 13290 // CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 { 13291 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13292 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13293 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 13294 // CHECK: ret <8 x i8> [[VQSHRN_N1]] 13295 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { 13296 return vqshrn_n_u16(a, 1); 13297 } 13298 13299 // CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 { 13300 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13301 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13302 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 13303 // CHECK: ret <4 x i16> [[VQSHRN_N1]] 13304 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { 13305 return vqshrn_n_u32(a, 1); 13306 } 13307 13308 // CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 { 13309 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13310 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13311 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 13312 // CHECK: ret <2 x i32> [[VQSHRN_N1]] 13313 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { 13314 return vqshrn_n_u64(a, 1); 13315 } 13316 13317 13318 // CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 { 13319 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13320 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13321 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 13322 // CHECK: ret <8 x i8> [[VQSHRUN_N1]] 13323 uint8x8_t test_vqshrun_n_s16(int16x8_t a) { 13324 return vqshrun_n_s16(a, 1); 13325 } 13326 13327 // CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 { 13328 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13329 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13330 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 13331 // CHECK: ret <4 x i16> [[VQSHRUN_N1]] 13332 uint16x4_t test_vqshrun_n_s32(int32x4_t a) { 13333 return vqshrun_n_s32(a, 1); 13334 } 13335 13336 // CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 { 13337 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13338 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13339 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> <i64 -1, i64 -1>) 13340 // CHECK: ret <2 x i32> [[VQSHRUN_N1]] 13341 uint32x2_t test_vqshrun_n_s64(int64x2_t a) { 13342 return vqshrun_n_s64(a, 1); 13343 } 13344 13345 13346 // CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { 13347 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 13348 // CHECK: ret <8 x i8> [[VQSUB_V_I]] 13349 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { 13350 return vqsub_s8(a, b); 13351 } 13352 13353 // CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { 13354 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13355 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 13356 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13357 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 13358 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 13359 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 13360 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> 13361 // CHECK: ret <4 x i16> [[TMP2]] 13362 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { 13363 return vqsub_s16(a, b); 13364 } 13365 13366 // CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { 13367 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13368 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 13369 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13370 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 13371 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 13372 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 13373 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> 13374 // CHECK: ret <2 x i32> [[TMP2]] 13375 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { 13376 return vqsub_s32(a, b); 13377 } 13378 13379 // CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { 13380 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13381 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13382 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13383 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13384 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 13385 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 13386 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> 13387 // CHECK: ret <1 x i64> [[TMP2]] 13388 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { 13389 return vqsub_s64(a, b); 13390 } 13391 13392 // CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { 13393 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 13394 // CHECK: ret <8 x i8> [[VQSUB_V_I]] 13395 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { 13396 return vqsub_u8(a, b); 13397 } 13398 13399 // CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { 13400 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13401 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 13402 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13403 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 13404 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 13405 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 13406 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> 13407 // CHECK: ret <4 x i16> [[TMP2]] 13408 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { 13409 return vqsub_u16(a, b); 13410 } 13411 13412 // CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { 13413 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13414 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 13415 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13416 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 13417 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 13418 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 13419 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> 13420 // CHECK: ret <2 x i32> [[TMP2]] 13421 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { 13422 return vqsub_u32(a, b); 13423 } 13424 13425 // CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { 13426 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13427 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13428 // CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13429 // CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13430 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 13431 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 13432 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> 13433 // CHECK: ret <1 x i64> [[TMP2]] 13434 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { 13435 return vqsub_u64(a, b); 13436 } 13437 13438 // CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 13439 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 13440 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 13441 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { 13442 return vqsubq_s8(a, b); 13443 } 13444 13445 // CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 13446 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13447 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13448 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13449 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13450 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 13451 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 13452 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> 13453 // CHECK: ret <8 x i16> [[TMP2]] 13454 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { 13455 return vqsubq_s16(a, b); 13456 } 13457 13458 // CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 13459 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13460 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13461 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13462 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13463 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 13464 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 13465 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> 13466 // CHECK: ret <4 x i32> [[TMP2]] 13467 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { 13468 return vqsubq_s32(a, b); 13469 } 13470 13471 // CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 13472 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13473 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13474 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13475 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13476 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 13477 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 13478 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> 13479 // CHECK: ret <2 x i64> [[TMP2]] 13480 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { 13481 return vqsubq_s64(a, b); 13482 } 13483 13484 // CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 13485 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 13486 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 13487 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { 13488 return vqsubq_u8(a, b); 13489 } 13490 13491 // CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 13492 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13493 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13494 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13495 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13496 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 13497 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 13498 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> 13499 // CHECK: ret <8 x i16> [[TMP2]] 13500 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { 13501 return vqsubq_u16(a, b); 13502 } 13503 13504 // CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 13505 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13506 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13507 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13508 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13509 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 13510 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 13511 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> 13512 // CHECK: ret <4 x i32> [[TMP2]] 13513 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { 13514 return vqsubq_u32(a, b); 13515 } 13516 13517 // CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 13518 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13519 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13520 // CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13521 // CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13522 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 13523 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 13524 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> 13525 // CHECK: ret <2 x i64> [[TMP2]] 13526 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { 13527 return vqsubq_u64(a, b); 13528 } 13529 13530 13531 // CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 13532 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13533 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13534 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13535 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13536 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 13537 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 13538 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { 13539 return vraddhn_s16(a, b); 13540 } 13541 13542 // CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 13543 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13544 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13545 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13546 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13547 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 13548 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 13549 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> 13550 // CHECK: ret <4 x i16> [[TMP2]] 13551 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { 13552 return vraddhn_s32(a, b); 13553 } 13554 13555 // CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 13556 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13557 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13558 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13559 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13560 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 13561 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 13562 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> 13563 // CHECK: ret <2 x i32> [[TMP2]] 13564 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { 13565 return vraddhn_s64(a, b); 13566 } 13567 13568 // CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 13569 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 13571 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13572 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 13573 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 13574 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 13575 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { 13576 return vraddhn_u16(a, b); 13577 } 13578 13579 // CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 13580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 13582 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13583 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 13584 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 13585 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 13586 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> 13587 // CHECK: ret <4 x i16> [[TMP2]] 13588 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { 13589 return vraddhn_u32(a, b); 13590 } 13591 13592 // CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 13593 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13594 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 13595 // CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13596 // CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 13597 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 13598 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 13599 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> 13600 // CHECK: ret <2 x i32> [[TMP2]] 13601 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { 13602 return vraddhn_u64(a, b); 13603 } 13604 13605 13606 // CHECK-LABEL: define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 { 13607 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 13608 // CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 13609 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> [[VRECPE_V_I]]) #4 13610 // CHECK: ret <2 x float> [[VRECPE_V1_I]] 13611 float32x2_t test_vrecpe_f32(float32x2_t a) { 13612 return vrecpe_f32(a); 13613 } 13614 13615 // CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 { 13616 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13617 // CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13618 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> [[VRECPE_V_I]]) #4 13619 // CHECK: ret <2 x i32> [[VRECPE_V1_I]] 13620 uint32x2_t test_vrecpe_u32(uint32x2_t a) { 13621 return vrecpe_u32(a); 13622 } 13623 13624 // CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 { 13625 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 13626 // CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 13627 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> [[VRECPEQ_V_I]]) #4 13628 // CHECK: ret <4 x float> [[VRECPEQ_V1_I]] 13629 float32x4_t test_vrecpeq_f32(float32x4_t a) { 13630 return vrecpeq_f32(a); 13631 } 13632 13633 // CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { 13634 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13635 // CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13636 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]]) #4 13637 // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]] 13638 uint32x4_t test_vrecpeq_u32(uint32x4_t a) { 13639 return vrecpeq_u32(a); 13640 } 13641 13642 13643 // CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %a, <2 x float> %b) #0 { 13644 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 13645 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 13646 // CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 13647 // CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 13648 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4 13649 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8> 13650 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float> 13651 // CHECK: ret <2 x float> [[TMP2]] 13652 float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) { 13653 return vrecps_f32(a, b); 13654 } 13655 13656 // CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %a, <4 x float> %b) #0 { 13657 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 13658 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 13659 // CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 13660 // CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 13661 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4 13662 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> 13663 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float> 13664 // CHECK: ret <4 x float> [[TMP2]] 13665 float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) { 13666 return vrecpsq_f32(a, b); 13667 } 13668 13669 13670 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 { 13671 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13672 // CHECK: ret <8 x i8> [[TMP0]] 13673 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) { 13674 return vreinterpret_s8_s16(a); 13675 } 13676 13677 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 { 13678 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13679 // CHECK: ret <8 x i8> [[TMP0]] 13680 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) { 13681 return vreinterpret_s8_s32(a); 13682 } 13683 13684 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 { 13685 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13686 // CHECK: ret <8 x i8> [[TMP0]] 13687 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) { 13688 return vreinterpret_s8_s64(a); 13689 } 13690 13691 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 { 13692 // CHECK: ret <8 x i8> %a 13693 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) { 13694 return vreinterpret_s8_u8(a); 13695 } 13696 13697 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 { 13698 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13699 // CHECK: ret <8 x i8> [[TMP0]] 13700 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) { 13701 return vreinterpret_s8_u16(a); 13702 } 13703 13704 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 { 13705 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13706 // CHECK: ret <8 x i8> [[TMP0]] 13707 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) { 13708 return vreinterpret_s8_u32(a); 13709 } 13710 13711 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 { 13712 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13713 // CHECK: ret <8 x i8> [[TMP0]] 13714 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) { 13715 return vreinterpret_s8_u64(a); 13716 } 13717 13718 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 { 13719 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 13720 // CHECK: ret <8 x i8> [[TMP0]] 13721 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) { 13722 return vreinterpret_s8_f16(a); 13723 } 13724 13725 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 { 13726 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 13727 // CHECK: ret <8 x i8> [[TMP0]] 13728 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) { 13729 return vreinterpret_s8_f32(a); 13730 } 13731 13732 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 { 13733 // CHECK: ret <8 x i8> %a 13734 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) { 13735 return vreinterpret_s8_p8(a); 13736 } 13737 13738 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 { 13739 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13740 // CHECK: ret <8 x i8> [[TMP0]] 13741 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) { 13742 return vreinterpret_s8_p16(a); 13743 } 13744 13745 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 { 13746 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 13747 // CHECK: ret <4 x i16> [[TMP0]] 13748 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) { 13749 return vreinterpret_s16_s8(a); 13750 } 13751 13752 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 { 13753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 13754 // CHECK: ret <4 x i16> [[TMP0]] 13755 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) { 13756 return vreinterpret_s16_s32(a); 13757 } 13758 13759 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 { 13760 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 13761 // CHECK: ret <4 x i16> [[TMP0]] 13762 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) { 13763 return vreinterpret_s16_s64(a); 13764 } 13765 13766 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 { 13767 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 13768 // CHECK: ret <4 x i16> [[TMP0]] 13769 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) { 13770 return vreinterpret_s16_u8(a); 13771 } 13772 13773 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 { 13774 // CHECK: ret <4 x i16> %a 13775 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) { 13776 return vreinterpret_s16_u16(a); 13777 } 13778 13779 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 { 13780 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 13781 // CHECK: ret <4 x i16> [[TMP0]] 13782 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) { 13783 return vreinterpret_s16_u32(a); 13784 } 13785 13786 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 { 13787 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 13788 // CHECK: ret <4 x i16> [[TMP0]] 13789 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) { 13790 return vreinterpret_s16_u64(a); 13791 } 13792 13793 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 { 13794 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 13795 // CHECK: ret <4 x i16> [[TMP0]] 13796 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) { 13797 return vreinterpret_s16_f16(a); 13798 } 13799 13800 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 { 13801 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 13802 // CHECK: ret <4 x i16> [[TMP0]] 13803 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) { 13804 return vreinterpret_s16_f32(a); 13805 } 13806 13807 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 { 13808 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 13809 // CHECK: ret <4 x i16> [[TMP0]] 13810 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) { 13811 return vreinterpret_s16_p8(a); 13812 } 13813 13814 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 { 13815 // CHECK: ret <4 x i16> %a 13816 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) { 13817 return vreinterpret_s16_p16(a); 13818 } 13819 13820 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 { 13821 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 13822 // CHECK: ret <2 x i32> [[TMP0]] 13823 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) { 13824 return vreinterpret_s32_s8(a); 13825 } 13826 13827 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 { 13828 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 13829 // CHECK: ret <2 x i32> [[TMP0]] 13830 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) { 13831 return vreinterpret_s32_s16(a); 13832 } 13833 13834 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 { 13835 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 13836 // CHECK: ret <2 x i32> [[TMP0]] 13837 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) { 13838 return vreinterpret_s32_s64(a); 13839 } 13840 13841 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 { 13842 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 13843 // CHECK: ret <2 x i32> [[TMP0]] 13844 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) { 13845 return vreinterpret_s32_u8(a); 13846 } 13847 13848 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 { 13849 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 13850 // CHECK: ret <2 x i32> [[TMP0]] 13851 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) { 13852 return vreinterpret_s32_u16(a); 13853 } 13854 13855 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 { 13856 // CHECK: ret <2 x i32> %a 13857 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) { 13858 return vreinterpret_s32_u32(a); 13859 } 13860 13861 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 { 13862 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 13863 // CHECK: ret <2 x i32> [[TMP0]] 13864 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) { 13865 return vreinterpret_s32_u64(a); 13866 } 13867 13868 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 { 13869 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 13870 // CHECK: ret <2 x i32> [[TMP0]] 13871 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) { 13872 return vreinterpret_s32_f16(a); 13873 } 13874 13875 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 { 13876 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 13877 // CHECK: ret <2 x i32> [[TMP0]] 13878 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) { 13879 return vreinterpret_s32_f32(a); 13880 } 13881 13882 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 { 13883 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 13884 // CHECK: ret <2 x i32> [[TMP0]] 13885 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) { 13886 return vreinterpret_s32_p8(a); 13887 } 13888 13889 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 { 13890 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 13891 // CHECK: ret <2 x i32> [[TMP0]] 13892 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) { 13893 return vreinterpret_s32_p16(a); 13894 } 13895 13896 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 { 13897 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 13898 // CHECK: ret <1 x i64> [[TMP0]] 13899 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) { 13900 return vreinterpret_s64_s8(a); 13901 } 13902 13903 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 { 13904 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 13905 // CHECK: ret <1 x i64> [[TMP0]] 13906 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) { 13907 return vreinterpret_s64_s16(a); 13908 } 13909 13910 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 { 13911 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 13912 // CHECK: ret <1 x i64> [[TMP0]] 13913 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) { 13914 return vreinterpret_s64_s32(a); 13915 } 13916 13917 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 { 13918 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 13919 // CHECK: ret <1 x i64> [[TMP0]] 13920 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) { 13921 return vreinterpret_s64_u8(a); 13922 } 13923 13924 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 { 13925 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 13926 // CHECK: ret <1 x i64> [[TMP0]] 13927 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) { 13928 return vreinterpret_s64_u16(a); 13929 } 13930 13931 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 { 13932 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 13933 // CHECK: ret <1 x i64> [[TMP0]] 13934 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) { 13935 return vreinterpret_s64_u32(a); 13936 } 13937 13938 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 { 13939 // CHECK: ret <1 x i64> %a 13940 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) { 13941 return vreinterpret_s64_u64(a); 13942 } 13943 13944 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 { 13945 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 13946 // CHECK: ret <1 x i64> [[TMP0]] 13947 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) { 13948 return vreinterpret_s64_f16(a); 13949 } 13950 13951 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 { 13952 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 13953 // CHECK: ret <1 x i64> [[TMP0]] 13954 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) { 13955 return vreinterpret_s64_f32(a); 13956 } 13957 13958 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 { 13959 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 13960 // CHECK: ret <1 x i64> [[TMP0]] 13961 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) { 13962 return vreinterpret_s64_p8(a); 13963 } 13964 13965 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 { 13966 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 13967 // CHECK: ret <1 x i64> [[TMP0]] 13968 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) { 13969 return vreinterpret_s64_p16(a); 13970 } 13971 13972 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 { 13973 // CHECK: ret <8 x i8> %a 13974 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) { 13975 return vreinterpret_u8_s8(a); 13976 } 13977 13978 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 { 13979 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13980 // CHECK: ret <8 x i8> [[TMP0]] 13981 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) { 13982 return vreinterpret_u8_s16(a); 13983 } 13984 13985 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 { 13986 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13987 // CHECK: ret <8 x i8> [[TMP0]] 13988 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) { 13989 return vreinterpret_u8_s32(a); 13990 } 13991 13992 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 { 13993 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13994 // CHECK: ret <8 x i8> [[TMP0]] 13995 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) { 13996 return vreinterpret_u8_s64(a); 13997 } 13998 13999 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 { 14000 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14001 // CHECK: ret <8 x i8> [[TMP0]] 14002 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) { 14003 return vreinterpret_u8_u16(a); 14004 } 14005 14006 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 { 14007 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14008 // CHECK: ret <8 x i8> [[TMP0]] 14009 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) { 14010 return vreinterpret_u8_u32(a); 14011 } 14012 14013 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 { 14014 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14015 // CHECK: ret <8 x i8> [[TMP0]] 14016 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) { 14017 return vreinterpret_u8_u64(a); 14018 } 14019 14020 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 { 14021 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 14022 // CHECK: ret <8 x i8> [[TMP0]] 14023 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) { 14024 return vreinterpret_u8_f16(a); 14025 } 14026 14027 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 { 14028 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 14029 // CHECK: ret <8 x i8> [[TMP0]] 14030 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) { 14031 return vreinterpret_u8_f32(a); 14032 } 14033 14034 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 { 14035 // CHECK: ret <8 x i8> %a 14036 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) { 14037 return vreinterpret_u8_p8(a); 14038 } 14039 14040 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 { 14041 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14042 // CHECK: ret <8 x i8> [[TMP0]] 14043 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) { 14044 return vreinterpret_u8_p16(a); 14045 } 14046 14047 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 { 14048 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14049 // CHECK: ret <4 x i16> [[TMP0]] 14050 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) { 14051 return vreinterpret_u16_s8(a); 14052 } 14053 14054 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 { 14055 // CHECK: ret <4 x i16> %a 14056 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) { 14057 return vreinterpret_u16_s16(a); 14058 } 14059 14060 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 { 14061 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14062 // CHECK: ret <4 x i16> [[TMP0]] 14063 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) { 14064 return vreinterpret_u16_s32(a); 14065 } 14066 14067 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 { 14068 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14069 // CHECK: ret <4 x i16> [[TMP0]] 14070 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) { 14071 return vreinterpret_u16_s64(a); 14072 } 14073 14074 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 { 14075 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14076 // CHECK: ret <4 x i16> [[TMP0]] 14077 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) { 14078 return vreinterpret_u16_u8(a); 14079 } 14080 14081 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 { 14082 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14083 // CHECK: ret <4 x i16> [[TMP0]] 14084 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) { 14085 return vreinterpret_u16_u32(a); 14086 } 14087 14088 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 { 14089 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14090 // CHECK: ret <4 x i16> [[TMP0]] 14091 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) { 14092 return vreinterpret_u16_u64(a); 14093 } 14094 14095 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 { 14096 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 14097 // CHECK: ret <4 x i16> [[TMP0]] 14098 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) { 14099 return vreinterpret_u16_f16(a); 14100 } 14101 14102 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 { 14103 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 14104 // CHECK: ret <4 x i16> [[TMP0]] 14105 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) { 14106 return vreinterpret_u16_f32(a); 14107 } 14108 14109 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 { 14110 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14111 // CHECK: ret <4 x i16> [[TMP0]] 14112 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) { 14113 return vreinterpret_u16_p8(a); 14114 } 14115 14116 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 { 14117 // CHECK: ret <4 x i16> %a 14118 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) { 14119 return vreinterpret_u16_p16(a); 14120 } 14121 14122 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 { 14123 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14124 // CHECK: ret <2 x i32> [[TMP0]] 14125 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) { 14126 return vreinterpret_u32_s8(a); 14127 } 14128 14129 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 { 14130 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14131 // CHECK: ret <2 x i32> [[TMP0]] 14132 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) { 14133 return vreinterpret_u32_s16(a); 14134 } 14135 14136 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 { 14137 // CHECK: ret <2 x i32> %a 14138 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) { 14139 return vreinterpret_u32_s32(a); 14140 } 14141 14142 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 { 14143 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14144 // CHECK: ret <2 x i32> [[TMP0]] 14145 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) { 14146 return vreinterpret_u32_s64(a); 14147 } 14148 14149 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 { 14150 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14151 // CHECK: ret <2 x i32> [[TMP0]] 14152 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) { 14153 return vreinterpret_u32_u8(a); 14154 } 14155 14156 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 { 14157 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14158 // CHECK: ret <2 x i32> [[TMP0]] 14159 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) { 14160 return vreinterpret_u32_u16(a); 14161 } 14162 14163 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 { 14164 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14165 // CHECK: ret <2 x i32> [[TMP0]] 14166 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) { 14167 return vreinterpret_u32_u64(a); 14168 } 14169 14170 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 { 14171 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 14172 // CHECK: ret <2 x i32> [[TMP0]] 14173 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) { 14174 return vreinterpret_u32_f16(a); 14175 } 14176 14177 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 { 14178 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 14179 // CHECK: ret <2 x i32> [[TMP0]] 14180 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) { 14181 return vreinterpret_u32_f32(a); 14182 } 14183 14184 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 { 14185 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14186 // CHECK: ret <2 x i32> [[TMP0]] 14187 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) { 14188 return vreinterpret_u32_p8(a); 14189 } 14190 14191 // CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 { 14192 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14193 // CHECK: ret <2 x i32> [[TMP0]] 14194 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) { 14195 return vreinterpret_u32_p16(a); 14196 } 14197 14198 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 { 14199 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14200 // CHECK: ret <1 x i64> [[TMP0]] 14201 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) { 14202 return vreinterpret_u64_s8(a); 14203 } 14204 14205 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 { 14206 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14207 // CHECK: ret <1 x i64> [[TMP0]] 14208 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) { 14209 return vreinterpret_u64_s16(a); 14210 } 14211 14212 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 { 14213 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 14214 // CHECK: ret <1 x i64> [[TMP0]] 14215 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) { 14216 return vreinterpret_u64_s32(a); 14217 } 14218 14219 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 { 14220 // CHECK: ret <1 x i64> %a 14221 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) { 14222 return vreinterpret_u64_s64(a); 14223 } 14224 14225 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 { 14226 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14227 // CHECK: ret <1 x i64> [[TMP0]] 14228 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) { 14229 return vreinterpret_u64_u8(a); 14230 } 14231 14232 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 { 14233 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14234 // CHECK: ret <1 x i64> [[TMP0]] 14235 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) { 14236 return vreinterpret_u64_u16(a); 14237 } 14238 14239 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 { 14240 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 14241 // CHECK: ret <1 x i64> [[TMP0]] 14242 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) { 14243 return vreinterpret_u64_u32(a); 14244 } 14245 14246 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 { 14247 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 14248 // CHECK: ret <1 x i64> [[TMP0]] 14249 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) { 14250 return vreinterpret_u64_f16(a); 14251 } 14252 14253 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 { 14254 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 14255 // CHECK: ret <1 x i64> [[TMP0]] 14256 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) { 14257 return vreinterpret_u64_f32(a); 14258 } 14259 14260 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 { 14261 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14262 // CHECK: ret <1 x i64> [[TMP0]] 14263 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) { 14264 return vreinterpret_u64_p8(a); 14265 } 14266 14267 // CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 { 14268 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14269 // CHECK: ret <1 x i64> [[TMP0]] 14270 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) { 14271 return vreinterpret_u64_p16(a); 14272 } 14273 14274 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 { 14275 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14276 // CHECK: ret <4 x half> [[TMP0]] 14277 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) { 14278 return vreinterpret_f16_s8(a); 14279 } 14280 14281 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 { 14282 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 14283 // CHECK: ret <4 x half> [[TMP0]] 14284 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) { 14285 return vreinterpret_f16_s16(a); 14286 } 14287 14288 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 { 14289 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 14290 // CHECK: ret <4 x half> [[TMP0]] 14291 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) { 14292 return vreinterpret_f16_s32(a); 14293 } 14294 14295 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 { 14296 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 14297 // CHECK: ret <4 x half> [[TMP0]] 14298 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) { 14299 return vreinterpret_f16_s64(a); 14300 } 14301 14302 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 { 14303 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14304 // CHECK: ret <4 x half> [[TMP0]] 14305 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) { 14306 return vreinterpret_f16_u8(a); 14307 } 14308 14309 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 { 14310 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 14311 // CHECK: ret <4 x half> [[TMP0]] 14312 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) { 14313 return vreinterpret_f16_u16(a); 14314 } 14315 14316 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 { 14317 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 14318 // CHECK: ret <4 x half> [[TMP0]] 14319 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) { 14320 return vreinterpret_f16_u32(a); 14321 } 14322 14323 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 { 14324 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 14325 // CHECK: ret <4 x half> [[TMP0]] 14326 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) { 14327 return vreinterpret_f16_u64(a); 14328 } 14329 14330 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 { 14331 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half> 14332 // CHECK: ret <4 x half> [[TMP0]] 14333 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) { 14334 return vreinterpret_f16_f32(a); 14335 } 14336 14337 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 { 14338 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14339 // CHECK: ret <4 x half> [[TMP0]] 14340 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) { 14341 return vreinterpret_f16_p8(a); 14342 } 14343 14344 // CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 { 14345 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 14346 // CHECK: ret <4 x half> [[TMP0]] 14347 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) { 14348 return vreinterpret_f16_p16(a); 14349 } 14350 14351 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 { 14352 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 14353 // CHECK: ret <2 x float> [[TMP0]] 14354 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) { 14355 return vreinterpret_f32_s8(a); 14356 } 14357 14358 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 { 14359 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 14360 // CHECK: ret <2 x float> [[TMP0]] 14361 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) { 14362 return vreinterpret_f32_s16(a); 14363 } 14364 14365 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 { 14366 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 14367 // CHECK: ret <2 x float> [[TMP0]] 14368 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) { 14369 return vreinterpret_f32_s32(a); 14370 } 14371 14372 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 { 14373 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 14374 // CHECK: ret <2 x float> [[TMP0]] 14375 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) { 14376 return vreinterpret_f32_s64(a); 14377 } 14378 14379 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 { 14380 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 14381 // CHECK: ret <2 x float> [[TMP0]] 14382 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) { 14383 return vreinterpret_f32_u8(a); 14384 } 14385 14386 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 { 14387 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 14388 // CHECK: ret <2 x float> [[TMP0]] 14389 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) { 14390 return vreinterpret_f32_u16(a); 14391 } 14392 14393 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 { 14394 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 14395 // CHECK: ret <2 x float> [[TMP0]] 14396 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) { 14397 return vreinterpret_f32_u32(a); 14398 } 14399 14400 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 { 14401 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 14402 // CHECK: ret <2 x float> [[TMP0]] 14403 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) { 14404 return vreinterpret_f32_u64(a); 14405 } 14406 14407 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 { 14408 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float> 14409 // CHECK: ret <2 x float> [[TMP0]] 14410 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) { 14411 return vreinterpret_f32_f16(a); 14412 } 14413 14414 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 { 14415 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 14416 // CHECK: ret <2 x float> [[TMP0]] 14417 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) { 14418 return vreinterpret_f32_p8(a); 14419 } 14420 14421 // CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 { 14422 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 14423 // CHECK: ret <2 x float> [[TMP0]] 14424 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) { 14425 return vreinterpret_f32_p16(a); 14426 } 14427 14428 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 { 14429 // CHECK: ret <8 x i8> %a 14430 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) { 14431 return vreinterpret_p8_s8(a); 14432 } 14433 14434 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 { 14435 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14436 // CHECK: ret <8 x i8> [[TMP0]] 14437 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) { 14438 return vreinterpret_p8_s16(a); 14439 } 14440 14441 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 { 14442 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14443 // CHECK: ret <8 x i8> [[TMP0]] 14444 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) { 14445 return vreinterpret_p8_s32(a); 14446 } 14447 14448 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 { 14449 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14450 // CHECK: ret <8 x i8> [[TMP0]] 14451 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) { 14452 return vreinterpret_p8_s64(a); 14453 } 14454 14455 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 { 14456 // CHECK: ret <8 x i8> %a 14457 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) { 14458 return vreinterpret_p8_u8(a); 14459 } 14460 14461 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 { 14462 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14463 // CHECK: ret <8 x i8> [[TMP0]] 14464 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) { 14465 return vreinterpret_p8_u16(a); 14466 } 14467 14468 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 { 14469 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14470 // CHECK: ret <8 x i8> [[TMP0]] 14471 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) { 14472 return vreinterpret_p8_u32(a); 14473 } 14474 14475 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 { 14476 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14477 // CHECK: ret <8 x i8> [[TMP0]] 14478 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) { 14479 return vreinterpret_p8_u64(a); 14480 } 14481 14482 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 { 14483 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 14484 // CHECK: ret <8 x i8> [[TMP0]] 14485 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) { 14486 return vreinterpret_p8_f16(a); 14487 } 14488 14489 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 { 14490 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 14491 // CHECK: ret <8 x i8> [[TMP0]] 14492 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) { 14493 return vreinterpret_p8_f32(a); 14494 } 14495 14496 // CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 { 14497 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14498 // CHECK: ret <8 x i8> [[TMP0]] 14499 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) { 14500 return vreinterpret_p8_p16(a); 14501 } 14502 14503 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 { 14504 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14505 // CHECK: ret <4 x i16> [[TMP0]] 14506 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) { 14507 return vreinterpret_p16_s8(a); 14508 } 14509 14510 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 { 14511 // CHECK: ret <4 x i16> %a 14512 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) { 14513 return vreinterpret_p16_s16(a); 14514 } 14515 14516 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 { 14517 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14518 // CHECK: ret <4 x i16> [[TMP0]] 14519 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) { 14520 return vreinterpret_p16_s32(a); 14521 } 14522 14523 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 { 14524 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14525 // CHECK: ret <4 x i16> [[TMP0]] 14526 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) { 14527 return vreinterpret_p16_s64(a); 14528 } 14529 14530 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 { 14531 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14532 // CHECK: ret <4 x i16> [[TMP0]] 14533 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) { 14534 return vreinterpret_p16_u8(a); 14535 } 14536 14537 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 { 14538 // CHECK: ret <4 x i16> %a 14539 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) { 14540 return vreinterpret_p16_u16(a); 14541 } 14542 14543 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 { 14544 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14545 // CHECK: ret <4 x i16> [[TMP0]] 14546 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) { 14547 return vreinterpret_p16_u32(a); 14548 } 14549 14550 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 { 14551 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14552 // CHECK: ret <4 x i16> [[TMP0]] 14553 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) { 14554 return vreinterpret_p16_u64(a); 14555 } 14556 14557 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 { 14558 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 14559 // CHECK: ret <4 x i16> [[TMP0]] 14560 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) { 14561 return vreinterpret_p16_f16(a); 14562 } 14563 14564 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 { 14565 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 14566 // CHECK: ret <4 x i16> [[TMP0]] 14567 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) { 14568 return vreinterpret_p16_f32(a); 14569 } 14570 14571 // CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 { 14572 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14573 // CHECK: ret <4 x i16> [[TMP0]] 14574 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) { 14575 return vreinterpret_p16_p8(a); 14576 } 14577 14578 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 { 14579 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14580 // CHECK: ret <16 x i8> [[TMP0]] 14581 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) { 14582 return vreinterpretq_s8_s16(a); 14583 } 14584 14585 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 { 14586 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 14587 // CHECK: ret <16 x i8> [[TMP0]] 14588 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) { 14589 return vreinterpretq_s8_s32(a); 14590 } 14591 14592 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 { 14593 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 14594 // CHECK: ret <16 x i8> [[TMP0]] 14595 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) { 14596 return vreinterpretq_s8_s64(a); 14597 } 14598 14599 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 { 14600 // CHECK: ret <16 x i8> %a 14601 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) { 14602 return vreinterpretq_s8_u8(a); 14603 } 14604 14605 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 { 14606 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14607 // CHECK: ret <16 x i8> [[TMP0]] 14608 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) { 14609 return vreinterpretq_s8_u16(a); 14610 } 14611 14612 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 { 14613 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 14614 // CHECK: ret <16 x i8> [[TMP0]] 14615 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) { 14616 return vreinterpretq_s8_u32(a); 14617 } 14618 14619 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 { 14620 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 14621 // CHECK: ret <16 x i8> [[TMP0]] 14622 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) { 14623 return vreinterpretq_s8_u64(a); 14624 } 14625 14626 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 { 14627 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 14628 // CHECK: ret <16 x i8> [[TMP0]] 14629 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) { 14630 return vreinterpretq_s8_f16(a); 14631 } 14632 14633 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 { 14634 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 14635 // CHECK: ret <16 x i8> [[TMP0]] 14636 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) { 14637 return vreinterpretq_s8_f32(a); 14638 } 14639 14640 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 { 14641 // CHECK: ret <16 x i8> %a 14642 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) { 14643 return vreinterpretq_s8_p8(a); 14644 } 14645 14646 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 { 14647 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14648 // CHECK: ret <16 x i8> [[TMP0]] 14649 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) { 14650 return vreinterpretq_s8_p16(a); 14651 } 14652 14653 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 { 14654 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14655 // CHECK: ret <8 x i16> [[TMP0]] 14656 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) { 14657 return vreinterpretq_s16_s8(a); 14658 } 14659 14660 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 { 14661 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 14662 // CHECK: ret <8 x i16> [[TMP0]] 14663 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) { 14664 return vreinterpretq_s16_s32(a); 14665 } 14666 14667 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 { 14668 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 14669 // CHECK: ret <8 x i16> [[TMP0]] 14670 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) { 14671 return vreinterpretq_s16_s64(a); 14672 } 14673 14674 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 { 14675 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14676 // CHECK: ret <8 x i16> [[TMP0]] 14677 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) { 14678 return vreinterpretq_s16_u8(a); 14679 } 14680 14681 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 { 14682 // CHECK: ret <8 x i16> %a 14683 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) { 14684 return vreinterpretq_s16_u16(a); 14685 } 14686 14687 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 { 14688 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 14689 // CHECK: ret <8 x i16> [[TMP0]] 14690 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) { 14691 return vreinterpretq_s16_u32(a); 14692 } 14693 14694 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 { 14695 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 14696 // CHECK: ret <8 x i16> [[TMP0]] 14697 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) { 14698 return vreinterpretq_s16_u64(a); 14699 } 14700 14701 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 { 14702 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 14703 // CHECK: ret <8 x i16> [[TMP0]] 14704 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) { 14705 return vreinterpretq_s16_f16(a); 14706 } 14707 14708 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 { 14709 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 14710 // CHECK: ret <8 x i16> [[TMP0]] 14711 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) { 14712 return vreinterpretq_s16_f32(a); 14713 } 14714 14715 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 { 14716 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14717 // CHECK: ret <8 x i16> [[TMP0]] 14718 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) { 14719 return vreinterpretq_s16_p8(a); 14720 } 14721 14722 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 { 14723 // CHECK: ret <8 x i16> %a 14724 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) { 14725 return vreinterpretq_s16_p16(a); 14726 } 14727 14728 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 { 14729 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 14730 // CHECK: ret <4 x i32> [[TMP0]] 14731 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) { 14732 return vreinterpretq_s32_s8(a); 14733 } 14734 14735 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 { 14736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 14737 // CHECK: ret <4 x i32> [[TMP0]] 14738 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) { 14739 return vreinterpretq_s32_s16(a); 14740 } 14741 14742 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 { 14743 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 14744 // CHECK: ret <4 x i32> [[TMP0]] 14745 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) { 14746 return vreinterpretq_s32_s64(a); 14747 } 14748 14749 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 { 14750 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 14751 // CHECK: ret <4 x i32> [[TMP0]] 14752 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) { 14753 return vreinterpretq_s32_u8(a); 14754 } 14755 14756 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 { 14757 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 14758 // CHECK: ret <4 x i32> [[TMP0]] 14759 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) { 14760 return vreinterpretq_s32_u16(a); 14761 } 14762 14763 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 { 14764 // CHECK: ret <4 x i32> %a 14765 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) { 14766 return vreinterpretq_s32_u32(a); 14767 } 14768 14769 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 { 14770 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 14771 // CHECK: ret <4 x i32> [[TMP0]] 14772 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) { 14773 return vreinterpretq_s32_u64(a); 14774 } 14775 14776 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 { 14777 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 14778 // CHECK: ret <4 x i32> [[TMP0]] 14779 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) { 14780 return vreinterpretq_s32_f16(a); 14781 } 14782 14783 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 { 14784 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 14785 // CHECK: ret <4 x i32> [[TMP0]] 14786 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) { 14787 return vreinterpretq_s32_f32(a); 14788 } 14789 14790 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 { 14791 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 14792 // CHECK: ret <4 x i32> [[TMP0]] 14793 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) { 14794 return vreinterpretq_s32_p8(a); 14795 } 14796 14797 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 { 14798 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 14799 // CHECK: ret <4 x i32> [[TMP0]] 14800 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) { 14801 return vreinterpretq_s32_p16(a); 14802 } 14803 14804 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 { 14805 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 14806 // CHECK: ret <2 x i64> [[TMP0]] 14807 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) { 14808 return vreinterpretq_s64_s8(a); 14809 } 14810 14811 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 { 14812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 14813 // CHECK: ret <2 x i64> [[TMP0]] 14814 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) { 14815 return vreinterpretq_s64_s16(a); 14816 } 14817 14818 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 { 14819 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 14820 // CHECK: ret <2 x i64> [[TMP0]] 14821 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) { 14822 return vreinterpretq_s64_s32(a); 14823 } 14824 14825 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 { 14826 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 14827 // CHECK: ret <2 x i64> [[TMP0]] 14828 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) { 14829 return vreinterpretq_s64_u8(a); 14830 } 14831 14832 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 { 14833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 14834 // CHECK: ret <2 x i64> [[TMP0]] 14835 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) { 14836 return vreinterpretq_s64_u16(a); 14837 } 14838 14839 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 { 14840 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 14841 // CHECK: ret <2 x i64> [[TMP0]] 14842 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) { 14843 return vreinterpretq_s64_u32(a); 14844 } 14845 14846 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 { 14847 // CHECK: ret <2 x i64> %a 14848 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) { 14849 return vreinterpretq_s64_u64(a); 14850 } 14851 14852 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 { 14853 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 14854 // CHECK: ret <2 x i64> [[TMP0]] 14855 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) { 14856 return vreinterpretq_s64_f16(a); 14857 } 14858 14859 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 { 14860 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 14861 // CHECK: ret <2 x i64> [[TMP0]] 14862 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) { 14863 return vreinterpretq_s64_f32(a); 14864 } 14865 14866 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 { 14867 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 14868 // CHECK: ret <2 x i64> [[TMP0]] 14869 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) { 14870 return vreinterpretq_s64_p8(a); 14871 } 14872 14873 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 { 14874 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 14875 // CHECK: ret <2 x i64> [[TMP0]] 14876 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) { 14877 return vreinterpretq_s64_p16(a); 14878 } 14879 14880 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 { 14881 // CHECK: ret <16 x i8> %a 14882 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) { 14883 return vreinterpretq_u8_s8(a); 14884 } 14885 14886 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 { 14887 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14888 // CHECK: ret <16 x i8> [[TMP0]] 14889 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) { 14890 return vreinterpretq_u8_s16(a); 14891 } 14892 14893 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 { 14894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 14895 // CHECK: ret <16 x i8> [[TMP0]] 14896 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) { 14897 return vreinterpretq_u8_s32(a); 14898 } 14899 14900 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 { 14901 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 14902 // CHECK: ret <16 x i8> [[TMP0]] 14903 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) { 14904 return vreinterpretq_u8_s64(a); 14905 } 14906 14907 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 { 14908 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14909 // CHECK: ret <16 x i8> [[TMP0]] 14910 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) { 14911 return vreinterpretq_u8_u16(a); 14912 } 14913 14914 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 { 14915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 14916 // CHECK: ret <16 x i8> [[TMP0]] 14917 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) { 14918 return vreinterpretq_u8_u32(a); 14919 } 14920 14921 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 { 14922 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 14923 // CHECK: ret <16 x i8> [[TMP0]] 14924 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) { 14925 return vreinterpretq_u8_u64(a); 14926 } 14927 14928 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 { 14929 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 14930 // CHECK: ret <16 x i8> [[TMP0]] 14931 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) { 14932 return vreinterpretq_u8_f16(a); 14933 } 14934 14935 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 { 14936 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 14937 // CHECK: ret <16 x i8> [[TMP0]] 14938 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) { 14939 return vreinterpretq_u8_f32(a); 14940 } 14941 14942 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 { 14943 // CHECK: ret <16 x i8> %a 14944 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) { 14945 return vreinterpretq_u8_p8(a); 14946 } 14947 14948 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 { 14949 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 14950 // CHECK: ret <16 x i8> [[TMP0]] 14951 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) { 14952 return vreinterpretq_u8_p16(a); 14953 } 14954 14955 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 { 14956 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14957 // CHECK: ret <8 x i16> [[TMP0]] 14958 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) { 14959 return vreinterpretq_u16_s8(a); 14960 } 14961 14962 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 { 14963 // CHECK: ret <8 x i16> %a 14964 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) { 14965 return vreinterpretq_u16_s16(a); 14966 } 14967 14968 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 { 14969 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 14970 // CHECK: ret <8 x i16> [[TMP0]] 14971 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) { 14972 return vreinterpretq_u16_s32(a); 14973 } 14974 14975 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 { 14976 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 14977 // CHECK: ret <8 x i16> [[TMP0]] 14978 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) { 14979 return vreinterpretq_u16_s64(a); 14980 } 14981 14982 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 { 14983 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 14984 // CHECK: ret <8 x i16> [[TMP0]] 14985 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) { 14986 return vreinterpretq_u16_u8(a); 14987 } 14988 14989 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 { 14990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 14991 // CHECK: ret <8 x i16> [[TMP0]] 14992 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) { 14993 return vreinterpretq_u16_u32(a); 14994 } 14995 14996 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 { 14997 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 14998 // CHECK: ret <8 x i16> [[TMP0]] 14999 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) { 15000 return vreinterpretq_u16_u64(a); 15001 } 15002 15003 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 { 15004 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 15005 // CHECK: ret <8 x i16> [[TMP0]] 15006 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) { 15007 return vreinterpretq_u16_f16(a); 15008 } 15009 15010 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 { 15011 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 15012 // CHECK: ret <8 x i16> [[TMP0]] 15013 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) { 15014 return vreinterpretq_u16_f32(a); 15015 } 15016 15017 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 { 15018 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15019 // CHECK: ret <8 x i16> [[TMP0]] 15020 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) { 15021 return vreinterpretq_u16_p8(a); 15022 } 15023 15024 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 { 15025 // CHECK: ret <8 x i16> %a 15026 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) { 15027 return vreinterpretq_u16_p16(a); 15028 } 15029 15030 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 { 15031 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15032 // CHECK: ret <4 x i32> [[TMP0]] 15033 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) { 15034 return vreinterpretq_u32_s8(a); 15035 } 15036 15037 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 { 15038 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15039 // CHECK: ret <4 x i32> [[TMP0]] 15040 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) { 15041 return vreinterpretq_u32_s16(a); 15042 } 15043 15044 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 { 15045 // CHECK: ret <4 x i32> %a 15046 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) { 15047 return vreinterpretq_u32_s32(a); 15048 } 15049 15050 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 { 15051 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 15052 // CHECK: ret <4 x i32> [[TMP0]] 15053 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) { 15054 return vreinterpretq_u32_s64(a); 15055 } 15056 15057 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 { 15058 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15059 // CHECK: ret <4 x i32> [[TMP0]] 15060 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) { 15061 return vreinterpretq_u32_u8(a); 15062 } 15063 15064 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 { 15065 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15066 // CHECK: ret <4 x i32> [[TMP0]] 15067 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) { 15068 return vreinterpretq_u32_u16(a); 15069 } 15070 15071 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 { 15072 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 15073 // CHECK: ret <4 x i32> [[TMP0]] 15074 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) { 15075 return vreinterpretq_u32_u64(a); 15076 } 15077 15078 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 { 15079 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 15080 // CHECK: ret <4 x i32> [[TMP0]] 15081 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) { 15082 return vreinterpretq_u32_f16(a); 15083 } 15084 15085 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 { 15086 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 15087 // CHECK: ret <4 x i32> [[TMP0]] 15088 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) { 15089 return vreinterpretq_u32_f32(a); 15090 } 15091 15092 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 { 15093 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15094 // CHECK: ret <4 x i32> [[TMP0]] 15095 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) { 15096 return vreinterpretq_u32_p8(a); 15097 } 15098 15099 // CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 { 15100 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15101 // CHECK: ret <4 x i32> [[TMP0]] 15102 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) { 15103 return vreinterpretq_u32_p16(a); 15104 } 15105 15106 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 { 15107 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15108 // CHECK: ret <2 x i64> [[TMP0]] 15109 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) { 15110 return vreinterpretq_u64_s8(a); 15111 } 15112 15113 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 { 15114 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15115 // CHECK: ret <2 x i64> [[TMP0]] 15116 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) { 15117 return vreinterpretq_u64_s16(a); 15118 } 15119 15120 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 { 15121 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 15122 // CHECK: ret <2 x i64> [[TMP0]] 15123 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) { 15124 return vreinterpretq_u64_s32(a); 15125 } 15126 15127 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 { 15128 // CHECK: ret <2 x i64> %a 15129 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) { 15130 return vreinterpretq_u64_s64(a); 15131 } 15132 15133 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 { 15134 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15135 // CHECK: ret <2 x i64> [[TMP0]] 15136 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) { 15137 return vreinterpretq_u64_u8(a); 15138 } 15139 15140 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 { 15141 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15142 // CHECK: ret <2 x i64> [[TMP0]] 15143 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) { 15144 return vreinterpretq_u64_u16(a); 15145 } 15146 15147 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 { 15148 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 15149 // CHECK: ret <2 x i64> [[TMP0]] 15150 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) { 15151 return vreinterpretq_u64_u32(a); 15152 } 15153 15154 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 { 15155 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 15156 // CHECK: ret <2 x i64> [[TMP0]] 15157 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) { 15158 return vreinterpretq_u64_f16(a); 15159 } 15160 15161 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 { 15162 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 15163 // CHECK: ret <2 x i64> [[TMP0]] 15164 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) { 15165 return vreinterpretq_u64_f32(a); 15166 } 15167 15168 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 { 15169 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15170 // CHECK: ret <2 x i64> [[TMP0]] 15171 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) { 15172 return vreinterpretq_u64_p8(a); 15173 } 15174 15175 // CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 { 15176 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15177 // CHECK: ret <2 x i64> [[TMP0]] 15178 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) { 15179 return vreinterpretq_u64_p16(a); 15180 } 15181 15182 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 { 15183 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 15184 // CHECK: ret <8 x half> [[TMP0]] 15185 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) { 15186 return vreinterpretq_f16_s8(a); 15187 } 15188 15189 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 { 15190 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 15191 // CHECK: ret <8 x half> [[TMP0]] 15192 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) { 15193 return vreinterpretq_f16_s16(a); 15194 } 15195 15196 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 { 15197 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 15198 // CHECK: ret <8 x half> [[TMP0]] 15199 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) { 15200 return vreinterpretq_f16_s32(a); 15201 } 15202 15203 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 { 15204 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 15205 // CHECK: ret <8 x half> [[TMP0]] 15206 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) { 15207 return vreinterpretq_f16_s64(a); 15208 } 15209 15210 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 { 15211 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 15212 // CHECK: ret <8 x half> [[TMP0]] 15213 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) { 15214 return vreinterpretq_f16_u8(a); 15215 } 15216 15217 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 { 15218 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 15219 // CHECK: ret <8 x half> [[TMP0]] 15220 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) { 15221 return vreinterpretq_f16_u16(a); 15222 } 15223 15224 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 { 15225 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 15226 // CHECK: ret <8 x half> [[TMP0]] 15227 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) { 15228 return vreinterpretq_f16_u32(a); 15229 } 15230 15231 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 { 15232 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 15233 // CHECK: ret <8 x half> [[TMP0]] 15234 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) { 15235 return vreinterpretq_f16_u64(a); 15236 } 15237 15238 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 { 15239 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half> 15240 // CHECK: ret <8 x half> [[TMP0]] 15241 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) { 15242 return vreinterpretq_f16_f32(a); 15243 } 15244 15245 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 { 15246 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 15247 // CHECK: ret <8 x half> [[TMP0]] 15248 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) { 15249 return vreinterpretq_f16_p8(a); 15250 } 15251 15252 // CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 { 15253 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 15254 // CHECK: ret <8 x half> [[TMP0]] 15255 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) { 15256 return vreinterpretq_f16_p16(a); 15257 } 15258 15259 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 { 15260 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 15261 // CHECK: ret <4 x float> [[TMP0]] 15262 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) { 15263 return vreinterpretq_f32_s8(a); 15264 } 15265 15266 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 { 15267 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 15268 // CHECK: ret <4 x float> [[TMP0]] 15269 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) { 15270 return vreinterpretq_f32_s16(a); 15271 } 15272 15273 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 { 15274 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 15275 // CHECK: ret <4 x float> [[TMP0]] 15276 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) { 15277 return vreinterpretq_f32_s32(a); 15278 } 15279 15280 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 { 15281 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 15282 // CHECK: ret <4 x float> [[TMP0]] 15283 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) { 15284 return vreinterpretq_f32_s64(a); 15285 } 15286 15287 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 { 15288 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 15289 // CHECK: ret <4 x float> [[TMP0]] 15290 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) { 15291 return vreinterpretq_f32_u8(a); 15292 } 15293 15294 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 { 15295 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 15296 // CHECK: ret <4 x float> [[TMP0]] 15297 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) { 15298 return vreinterpretq_f32_u16(a); 15299 } 15300 15301 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 { 15302 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 15303 // CHECK: ret <4 x float> [[TMP0]] 15304 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) { 15305 return vreinterpretq_f32_u32(a); 15306 } 15307 15308 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 { 15309 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 15310 // CHECK: ret <4 x float> [[TMP0]] 15311 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) { 15312 return vreinterpretq_f32_u64(a); 15313 } 15314 15315 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 { 15316 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float> 15317 // CHECK: ret <4 x float> [[TMP0]] 15318 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) { 15319 return vreinterpretq_f32_f16(a); 15320 } 15321 15322 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 { 15323 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 15324 // CHECK: ret <4 x float> [[TMP0]] 15325 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) { 15326 return vreinterpretq_f32_p8(a); 15327 } 15328 15329 // CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 { 15330 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 15331 // CHECK: ret <4 x float> [[TMP0]] 15332 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) { 15333 return vreinterpretq_f32_p16(a); 15334 } 15335 15336 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 { 15337 // CHECK: ret <16 x i8> %a 15338 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) { 15339 return vreinterpretq_p8_s8(a); 15340 } 15341 15342 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 { 15343 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15344 // CHECK: ret <16 x i8> [[TMP0]] 15345 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) { 15346 return vreinterpretq_p8_s16(a); 15347 } 15348 15349 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 { 15350 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15351 // CHECK: ret <16 x i8> [[TMP0]] 15352 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) { 15353 return vreinterpretq_p8_s32(a); 15354 } 15355 15356 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 { 15357 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15358 // CHECK: ret <16 x i8> [[TMP0]] 15359 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) { 15360 return vreinterpretq_p8_s64(a); 15361 } 15362 15363 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 { 15364 // CHECK: ret <16 x i8> %a 15365 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) { 15366 return vreinterpretq_p8_u8(a); 15367 } 15368 15369 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 { 15370 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15371 // CHECK: ret <16 x i8> [[TMP0]] 15372 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) { 15373 return vreinterpretq_p8_u16(a); 15374 } 15375 15376 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 { 15377 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15378 // CHECK: ret <16 x i8> [[TMP0]] 15379 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) { 15380 return vreinterpretq_p8_u32(a); 15381 } 15382 15383 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 { 15384 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15385 // CHECK: ret <16 x i8> [[TMP0]] 15386 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) { 15387 return vreinterpretq_p8_u64(a); 15388 } 15389 15390 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 { 15391 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 15392 // CHECK: ret <16 x i8> [[TMP0]] 15393 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) { 15394 return vreinterpretq_p8_f16(a); 15395 } 15396 15397 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 { 15398 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 15399 // CHECK: ret <16 x i8> [[TMP0]] 15400 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) { 15401 return vreinterpretq_p8_f32(a); 15402 } 15403 15404 // CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 { 15405 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15406 // CHECK: ret <16 x i8> [[TMP0]] 15407 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) { 15408 return vreinterpretq_p8_p16(a); 15409 } 15410 15411 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 { 15412 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15413 // CHECK: ret <8 x i16> [[TMP0]] 15414 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) { 15415 return vreinterpretq_p16_s8(a); 15416 } 15417 15418 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 { 15419 // CHECK: ret <8 x i16> %a 15420 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) { 15421 return vreinterpretq_p16_s16(a); 15422 } 15423 15424 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 { 15425 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 15426 // CHECK: ret <8 x i16> [[TMP0]] 15427 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) { 15428 return vreinterpretq_p16_s32(a); 15429 } 15430 15431 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 { 15432 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15433 // CHECK: ret <8 x i16> [[TMP0]] 15434 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) { 15435 return vreinterpretq_p16_s64(a); 15436 } 15437 15438 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 { 15439 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15440 // CHECK: ret <8 x i16> [[TMP0]] 15441 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) { 15442 return vreinterpretq_p16_u8(a); 15443 } 15444 15445 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 { 15446 // CHECK: ret <8 x i16> %a 15447 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) { 15448 return vreinterpretq_p16_u16(a); 15449 } 15450 15451 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 { 15452 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 15453 // CHECK: ret <8 x i16> [[TMP0]] 15454 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) { 15455 return vreinterpretq_p16_u32(a); 15456 } 15457 15458 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 { 15459 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15460 // CHECK: ret <8 x i16> [[TMP0]] 15461 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) { 15462 return vreinterpretq_p16_u64(a); 15463 } 15464 15465 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 { 15466 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 15467 // CHECK: ret <8 x i16> [[TMP0]] 15468 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) { 15469 return vreinterpretq_p16_f16(a); 15470 } 15471 15472 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 { 15473 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 15474 // CHECK: ret <8 x i16> [[TMP0]] 15475 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) { 15476 return vreinterpretq_p16_f32(a); 15477 } 15478 15479 // CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 { 15480 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15481 // CHECK: ret <8 x i16> [[TMP0]] 15482 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) { 15483 return vreinterpretq_p16_p8(a); 15484 } 15485 15486 15487 // CHECK-LABEL: define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 { 15488 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15489 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15490 int8x8_t test_vrev16_s8(int8x8_t a) { 15491 return vrev16_s8(a); 15492 } 15493 15494 // CHECK-LABEL: define <8 x i8> @test_vrev16_u8(<8 x i8> %a) #0 { 15495 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15496 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15497 uint8x8_t test_vrev16_u8(uint8x8_t a) { 15498 return vrev16_u8(a); 15499 } 15500 15501 // CHECK-LABEL: define <8 x i8> @test_vrev16_p8(<8 x i8> %a) #0 { 15502 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15503 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15504 poly8x8_t test_vrev16_p8(poly8x8_t a) { 15505 return vrev16_p8(a); 15506 } 15507 15508 // CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 { 15509 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 15510 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15511 int8x16_t test_vrev16q_s8(int8x16_t a) { 15512 return vrev16q_s8(a); 15513 } 15514 15515 // CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(<16 x i8> %a) #0 { 15516 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 15517 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15518 uint8x16_t test_vrev16q_u8(uint8x16_t a) { 15519 return vrev16q_u8(a); 15520 } 15521 15522 // CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(<16 x i8> %a) #0 { 15523 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 15524 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15525 poly8x16_t test_vrev16q_p8(poly8x16_t a) { 15526 return vrev16q_p8(a); 15527 } 15528 15529 15530 // CHECK-LABEL: define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 { 15531 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15532 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15533 int8x8_t test_vrev32_s8(int8x8_t a) { 15534 return vrev32_s8(a); 15535 } 15536 15537 // CHECK-LABEL: define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 { 15538 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15539 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 15540 int16x4_t test_vrev32_s16(int16x4_t a) { 15541 return vrev32_s16(a); 15542 } 15543 15544 // CHECK-LABEL: define <8 x i8> @test_vrev32_u8(<8 x i8> %a) #0 { 15545 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15546 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15547 uint8x8_t test_vrev32_u8(uint8x8_t a) { 15548 return vrev32_u8(a); 15549 } 15550 15551 // CHECK-LABEL: define <4 x i16> @test_vrev32_u16(<4 x i16> %a) #0 { 15552 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15553 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 15554 uint16x4_t test_vrev32_u16(uint16x4_t a) { 15555 return vrev32_u16(a); 15556 } 15557 15558 // CHECK-LABEL: define <8 x i8> @test_vrev32_p8(<8 x i8> %a) #0 { 15559 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15560 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15561 poly8x8_t test_vrev32_p8(poly8x8_t a) { 15562 return vrev32_p8(a); 15563 } 15564 15565 // CHECK-LABEL: define <4 x i16> @test_vrev32_p16(<4 x i16> %a) #0 { 15566 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15567 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 15568 poly16x4_t test_vrev32_p16(poly16x4_t a) { 15569 return vrev32_p16(a); 15570 } 15571 15572 // CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 { 15573 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 15574 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15575 int8x16_t test_vrev32q_s8(int8x16_t a) { 15576 return vrev32q_s8(a); 15577 } 15578 15579 // CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 { 15580 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15581 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 15582 int16x8_t test_vrev32q_s16(int16x8_t a) { 15583 return vrev32q_s16(a); 15584 } 15585 15586 // CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(<16 x i8> %a) #0 { 15587 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 15588 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15589 uint8x16_t test_vrev32q_u8(uint8x16_t a) { 15590 return vrev32q_u8(a); 15591 } 15592 15593 // CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(<8 x i16> %a) #0 { 15594 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15595 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 15596 uint16x8_t test_vrev32q_u16(uint16x8_t a) { 15597 return vrev32q_u16(a); 15598 } 15599 15600 // CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(<16 x i8> %a) #0 { 15601 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 15602 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15603 poly8x16_t test_vrev32q_p8(poly8x16_t a) { 15604 return vrev32q_p8(a); 15605 } 15606 15607 // CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(<8 x i16> %a) #0 { 15608 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 15609 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 15610 poly16x8_t test_vrev32q_p16(poly16x8_t a) { 15611 return vrev32q_p16(a); 15612 } 15613 15614 15615 // CHECK-LABEL: define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 { 15616 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 15617 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15618 int8x8_t test_vrev64_s8(int8x8_t a) { 15619 return vrev64_s8(a); 15620 } 15621 15622 // CHECK-LABEL: define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 { 15623 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 15624 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 15625 int16x4_t test_vrev64_s16(int16x4_t a) { 15626 return vrev64_s16(a); 15627 } 15628 15629 // CHECK-LABEL: define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 { 15630 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0> 15631 // CHECK: ret <2 x i32> [[SHUFFLE_I]] 15632 int32x2_t test_vrev64_s32(int32x2_t a) { 15633 return vrev64_s32(a); 15634 } 15635 15636 // CHECK-LABEL: define <8 x i8> @test_vrev64_u8(<8 x i8> %a) #0 { 15637 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 15638 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15639 uint8x8_t test_vrev64_u8(uint8x8_t a) { 15640 return vrev64_u8(a); 15641 } 15642 15643 // CHECK-LABEL: define <4 x i16> @test_vrev64_u16(<4 x i16> %a) #0 { 15644 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 15645 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 15646 uint16x4_t test_vrev64_u16(uint16x4_t a) { 15647 return vrev64_u16(a); 15648 } 15649 15650 // CHECK-LABEL: define <2 x i32> @test_vrev64_u32(<2 x i32> %a) #0 { 15651 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0> 15652 // CHECK: ret <2 x i32> [[SHUFFLE_I]] 15653 uint32x2_t test_vrev64_u32(uint32x2_t a) { 15654 return vrev64_u32(a); 15655 } 15656 15657 // CHECK-LABEL: define <8 x i8> @test_vrev64_p8(<8 x i8> %a) #0 { 15658 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 15659 // CHECK: ret <8 x i8> [[SHUFFLE_I]] 15660 poly8x8_t test_vrev64_p8(poly8x8_t a) { 15661 return vrev64_p8(a); 15662 } 15663 15664 // CHECK-LABEL: define <4 x i16> @test_vrev64_p16(<4 x i16> %a) #0 { 15665 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 15666 // CHECK: ret <4 x i16> [[SHUFFLE_I]] 15667 poly16x4_t test_vrev64_p16(poly16x4_t a) { 15668 return vrev64_p16(a); 15669 } 15670 15671 // CHECK-LABEL: define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 { 15672 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0> 15673 // CHECK: ret <2 x float> [[SHUFFLE_I]] 15674 float32x2_t test_vrev64_f32(float32x2_t a) { 15675 return vrev64_f32(a); 15676 } 15677 15678 // CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 { 15679 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 15680 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15681 int8x16_t test_vrev64q_s8(int8x16_t a) { 15682 return vrev64q_s8(a); 15683 } 15684 15685 // CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 { 15686 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15687 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 15688 int16x8_t test_vrev64q_s16(int16x8_t a) { 15689 return vrev64q_s16(a); 15690 } 15691 15692 // CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 { 15693 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15694 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 15695 int32x4_t test_vrev64q_s32(int32x4_t a) { 15696 return vrev64q_s32(a); 15697 } 15698 15699 // CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(<16 x i8> %a) #0 { 15700 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 15701 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15702 uint8x16_t test_vrev64q_u8(uint8x16_t a) { 15703 return vrev64q_u8(a); 15704 } 15705 15706 // CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(<8 x i16> %a) #0 { 15707 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15708 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 15709 uint16x8_t test_vrev64q_u16(uint16x8_t a) { 15710 return vrev64q_u16(a); 15711 } 15712 15713 // CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(<4 x i32> %a) #0 { 15714 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15715 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 15716 uint32x4_t test_vrev64q_u32(uint32x4_t a) { 15717 return vrev64q_u32(a); 15718 } 15719 15720 // CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(<16 x i8> %a) #0 { 15721 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 15722 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 15723 poly8x16_t test_vrev64q_p8(poly8x16_t a) { 15724 return vrev64q_p8(a); 15725 } 15726 15727 // CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(<8 x i16> %a) #0 { 15728 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 15729 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 15730 poly16x8_t test_vrev64q_p16(poly16x8_t a) { 15731 return vrev64q_p16(a); 15732 } 15733 15734 // CHECK-LABEL: define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 { 15735 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 15736 // CHECK: ret <4 x float> [[SHUFFLE_I]] 15737 float32x4_t test_vrev64q_f32(float32x4_t a) { 15738 return vrev64q_f32(a); 15739 } 15740 15741 15742 // CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { 15743 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 15744 // CHECK: ret <8 x i8> [[VRHADD_V_I]] 15745 int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) { 15746 return vrhadd_s8(a, b); 15747 } 15748 15749 // CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { 15750 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15751 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 15752 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 15753 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 15754 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 15755 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 15756 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> 15757 // CHECK: ret <4 x i16> [[TMP2]] 15758 int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) { 15759 return vrhadd_s16(a, b); 15760 } 15761 15762 // CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { 15763 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15764 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 15765 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 15766 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 15767 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 15768 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 15769 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> 15770 // CHECK: ret <2 x i32> [[TMP2]] 15771 int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) { 15772 return vrhadd_s32(a, b); 15773 } 15774 15775 // CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { 15776 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 15777 // CHECK: ret <8 x i8> [[VRHADD_V_I]] 15778 uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) { 15779 return vrhadd_u8(a, b); 15780 } 15781 15782 // CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { 15783 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15784 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 15785 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 15786 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 15787 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 15788 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 15789 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> 15790 // CHECK: ret <4 x i16> [[TMP2]] 15791 uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) { 15792 return vrhadd_u16(a, b); 15793 } 15794 15795 // CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { 15796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15797 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 15798 // CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 15799 // CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 15800 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 15801 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 15802 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> 15803 // CHECK: ret <2 x i32> [[TMP2]] 15804 uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) { 15805 return vrhadd_u32(a, b); 15806 } 15807 15808 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 15809 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 15810 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 15811 int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) { 15812 return vrhaddq_s8(a, b); 15813 } 15814 15815 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 15816 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15817 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 15818 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 15819 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 15820 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 15821 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 15822 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> 15823 // CHECK: ret <8 x i16> [[TMP2]] 15824 int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) { 15825 return vrhaddq_s16(a, b); 15826 } 15827 15828 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 15829 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15830 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 15831 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 15832 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 15833 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 15834 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 15835 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> 15836 // CHECK: ret <4 x i32> [[TMP2]] 15837 int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) { 15838 return vrhaddq_s32(a, b); 15839 } 15840 15841 // CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 15842 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 15843 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 15844 uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) { 15845 return vrhaddq_u8(a, b); 15846 } 15847 15848 // CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 15849 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15850 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 15851 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 15852 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 15853 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 15854 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 15855 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> 15856 // CHECK: ret <8 x i16> [[TMP2]] 15857 uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) { 15858 return vrhaddq_u16(a, b); 15859 } 15860 15861 // CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 15862 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15863 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 15864 // CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 15865 // CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 15866 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 15867 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 15868 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> 15869 // CHECK: ret <4 x i32> [[TMP2]] 15870 uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) { 15871 return vrhaddq_u32(a, b); 15872 } 15873 15874 15875 // CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 15876 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 15877 // CHECK: ret <8 x i8> [[VRSHL_V_I]] 15878 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { 15879 return vrshl_s8(a, b); 15880 } 15881 15882 // CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 15883 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15884 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 15885 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 15886 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 15887 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 15888 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 15889 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> 15890 // CHECK: ret <4 x i16> [[TMP2]] 15891 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { 15892 return vrshl_s16(a, b); 15893 } 15894 15895 // CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 15896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15897 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 15898 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 15899 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 15900 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 15901 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 15902 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> 15903 // CHECK: ret <2 x i32> [[TMP2]] 15904 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { 15905 return vrshl_s32(a, b); 15906 } 15907 15908 // CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 15909 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 15910 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 15911 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 15912 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 15913 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 15914 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 15915 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> 15916 // CHECK: ret <1 x i64> [[TMP2]] 15917 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { 15918 return vrshl_s64(a, b); 15919 } 15920 15921 // CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 15922 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 15923 // CHECK: ret <8 x i8> [[VRSHL_V_I]] 15924 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { 15925 return vrshl_u8(a, b); 15926 } 15927 15928 // CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 15929 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15930 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 15931 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 15932 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 15933 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 15934 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 15935 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> 15936 // CHECK: ret <4 x i16> [[TMP2]] 15937 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { 15938 return vrshl_u16(a, b); 15939 } 15940 15941 // CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 15942 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15943 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 15944 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 15945 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 15946 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 15947 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 15948 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> 15949 // CHECK: ret <2 x i32> [[TMP2]] 15950 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { 15951 return vrshl_u32(a, b); 15952 } 15953 15954 // CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 15955 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 15956 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 15957 // CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 15958 // CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 15959 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 15960 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 15961 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> 15962 // CHECK: ret <1 x i64> [[TMP2]] 15963 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { 15964 return vrshl_u64(a, b); 15965 } 15966 15967 // CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 15968 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 15969 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 15970 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { 15971 return vrshlq_s8(a, b); 15972 } 15973 15974 // CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 15975 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15976 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 15977 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 15978 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 15979 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 15980 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 15981 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> 15982 // CHECK: ret <8 x i16> [[TMP2]] 15983 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { 15984 return vrshlq_s16(a, b); 15985 } 15986 15987 // CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 15988 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15989 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 15990 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 15991 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 15992 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 15993 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 15994 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> 15995 // CHECK: ret <4 x i32> [[TMP2]] 15996 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { 15997 return vrshlq_s32(a, b); 15998 } 15999 16000 // CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 16001 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16002 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16003 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16004 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16005 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 16006 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 16007 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> 16008 // CHECK: ret <2 x i64> [[TMP2]] 16009 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { 16010 return vrshlq_s64(a, b); 16011 } 16012 16013 // CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 16014 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 16015 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 16016 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { 16017 return vrshlq_u8(a, b); 16018 } 16019 16020 // CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 16021 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16022 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16023 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16024 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16025 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 16026 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 16027 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> 16028 // CHECK: ret <8 x i16> [[TMP2]] 16029 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { 16030 return vrshlq_u16(a, b); 16031 } 16032 16033 // CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 16034 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16035 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16036 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16037 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16038 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 16039 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 16040 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> 16041 // CHECK: ret <4 x i32> [[TMP2]] 16042 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { 16043 return vrshlq_u32(a, b); 16044 } 16045 16046 // CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 16047 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16048 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16049 // CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16050 // CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16051 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 16052 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 16053 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> 16054 // CHECK: ret <2 x i64> [[TMP2]] 16055 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { 16056 return vrshlq_u64(a, b); 16057 } 16058 16059 16060 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 { 16061 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16062 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16063 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16064 // CHECK: ret <8 x i8> [[VRSHRN_N1]] 16065 int8x8_t test_vrshrn_n_s16(int16x8_t a) { 16066 return vrshrn_n_s16(a, 1); 16067 } 16068 16069 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 { 16070 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16071 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16072 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16073 // CHECK: ret <4 x i16> [[VRSHRN_N1]] 16074 int16x4_t test_vrshrn_n_s32(int32x4_t a) { 16075 return vrshrn_n_s32(a, 1); 16076 } 16077 16078 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 { 16079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16080 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16081 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 16082 // CHECK: ret <2 x i32> [[VRSHRN_N1]] 16083 int32x2_t test_vrshrn_n_s64(int64x2_t a) { 16084 return vrshrn_n_s64(a, 1); 16085 } 16086 16087 // CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 { 16088 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16089 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16090 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16091 // CHECK: ret <8 x i8> [[VRSHRN_N1]] 16092 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { 16093 return vrshrn_n_u16(a, 1); 16094 } 16095 16096 // CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 { 16097 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16098 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16099 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16100 // CHECK: ret <4 x i16> [[VRSHRN_N1]] 16101 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { 16102 return vrshrn_n_u32(a, 1); 16103 } 16104 16105 // CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 { 16106 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16107 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16108 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>) 16109 // CHECK: ret <2 x i32> [[VRSHRN_N1]] 16110 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { 16111 return vrshrn_n_u64(a, 1); 16112 } 16113 16114 16115 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 { 16116 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16117 // CHECK: ret <8 x i8> [[VRSHR_N]] 16118 int8x8_t test_vrshr_n_s8(int8x8_t a) { 16119 return vrshr_n_s8(a, 1); 16120 } 16121 16122 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 { 16123 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16124 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16125 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 16126 // CHECK: ret <4 x i16> [[VRSHR_N1]] 16127 int16x4_t test_vrshr_n_s16(int16x4_t a) { 16128 return vrshr_n_s16(a, 1); 16129 } 16130 16131 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 { 16132 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16133 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16134 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>) 16135 // CHECK: ret <2 x i32> [[VRSHR_N1]] 16136 int32x2_t test_vrshr_n_s32(int32x2_t a) { 16137 return vrshr_n_s32(a, 1); 16138 } 16139 16140 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 { 16141 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16142 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16143 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 16144 // CHECK: ret <1 x i64> [[VRSHR_N1]] 16145 int64x1_t test_vrshr_n_s64(int64x1_t a) { 16146 return vrshr_n_s64(a, 1); 16147 } 16148 16149 // CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 { 16150 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16151 // CHECK: ret <8 x i8> [[VRSHR_N]] 16152 uint8x8_t test_vrshr_n_u8(uint8x8_t a) { 16153 return vrshr_n_u8(a, 1); 16154 } 16155 16156 // CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 { 16157 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16158 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16159 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 16160 // CHECK: ret <4 x i16> [[VRSHR_N1]] 16161 uint16x4_t test_vrshr_n_u16(uint16x4_t a) { 16162 return vrshr_n_u16(a, 1); 16163 } 16164 16165 // CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 { 16166 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16167 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16168 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>) 16169 // CHECK: ret <2 x i32> [[VRSHR_N1]] 16170 uint32x2_t test_vrshr_n_u32(uint32x2_t a) { 16171 return vrshr_n_u32(a, 1); 16172 } 16173 16174 // CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 { 16175 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16176 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16177 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>) 16178 // CHECK: ret <1 x i64> [[VRSHR_N1]] 16179 uint64x1_t test_vrshr_n_u64(uint64x1_t a) { 16180 return vrshr_n_u64(a, 1); 16181 } 16182 16183 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 { 16184 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16185 // CHECK: ret <16 x i8> [[VRSHR_N]] 16186 int8x16_t test_vrshrq_n_s8(int8x16_t a) { 16187 return vrshrq_n_s8(a, 1); 16188 } 16189 16190 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 { 16191 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16192 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16193 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16194 // CHECK: ret <8 x i16> [[VRSHR_N1]] 16195 int16x8_t test_vrshrq_n_s16(int16x8_t a) { 16196 return vrshrq_n_s16(a, 1); 16197 } 16198 16199 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 { 16200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16201 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16202 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16203 // CHECK: ret <4 x i32> [[VRSHR_N1]] 16204 int32x4_t test_vrshrq_n_s32(int32x4_t a) { 16205 return vrshrq_n_s32(a, 1); 16206 } 16207 16208 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 { 16209 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16210 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16211 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>) 16212 // CHECK: ret <2 x i64> [[VRSHR_N1]] 16213 int64x2_t test_vrshrq_n_s64(int64x2_t a) { 16214 return vrshrq_n_s64(a, 1); 16215 } 16216 16217 // CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 { 16218 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16219 // CHECK: ret <16 x i8> [[VRSHR_N]] 16220 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { 16221 return vrshrq_n_u8(a, 1); 16222 } 16223 16224 // CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 { 16225 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16226 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16227 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16228 // CHECK: ret <8 x i16> [[VRSHR_N1]] 16229 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { 16230 return vrshrq_n_u16(a, 1); 16231 } 16232 16233 // CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 { 16234 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16235 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16236 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16237 // CHECK: ret <4 x i32> [[VRSHR_N1]] 16238 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { 16239 return vrshrq_n_u32(a, 1); 16240 } 16241 16242 // CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 { 16243 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16244 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16245 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>) 16246 // CHECK: ret <2 x i64> [[VRSHR_N1]] 16247 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { 16248 return vrshrq_n_u64(a, 1); 16249 } 16250 16251 16252 // CHECK-LABEL: define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 { 16253 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 16254 // CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 16255 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]]) #4 16256 // CHECK: ret <2 x float> [[VRSQRTE_V1_I]] 16257 float32x2_t test_vrsqrte_f32(float32x2_t a) { 16258 return vrsqrte_f32(a); 16259 } 16260 16261 // CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 { 16262 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16263 // CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16264 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4 16265 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] 16266 uint32x2_t test_vrsqrte_u32(uint32x2_t a) { 16267 return vrsqrte_u32(a); 16268 } 16269 16270 // CHECK-LABEL: define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 { 16271 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 16272 // CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 16273 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]]) #4 16274 // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]] 16275 float32x4_t test_vrsqrteq_f32(float32x4_t a) { 16276 return vrsqrteq_f32(a); 16277 } 16278 16279 // CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 { 16280 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16281 // CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16282 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4 16283 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] 16284 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { 16285 return vrsqrteq_u32(a); 16286 } 16287 16288 16289 // CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %a, <2 x float> %b) #0 { 16290 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 16291 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 16292 // CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 16293 // CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 16294 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4 16295 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> 16296 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float> 16297 // CHECK: ret <2 x float> [[TMP2]] 16298 float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) { 16299 return vrsqrts_f32(a, b); 16300 } 16301 16302 // CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %a, <4 x float> %b) #0 { 16303 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 16304 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 16305 // CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 16306 // CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 16307 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4 16308 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> 16309 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float> 16310 // CHECK: ret <4 x float> [[TMP2]] 16311 float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) { 16312 return vrsqrtsq_f32(a, b); 16313 } 16314 16315 16316 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 16317 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16318 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] 16319 // CHECK: ret <8 x i8> [[VRSRA_N]] 16320 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { 16321 return vrsra_n_s8(a, b, 1); 16322 } 16323 16324 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 16325 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16326 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16327 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16328 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 16329 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 16330 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]] 16331 // CHECK: ret <4 x i16> [[VRSRA_N]] 16332 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { 16333 return vrsra_n_s16(a, b, 1); 16334 } 16335 16336 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 16337 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16338 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16339 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16340 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 16341 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>) 16342 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] 16343 // CHECK: ret <2 x i32> [[VRSRA_N]] 16344 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { 16345 return vrsra_n_s32(a, b, 1); 16346 } 16347 16348 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 16349 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16350 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16351 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16352 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 16353 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>) 16354 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]] 16355 // CHECK: ret <1 x i64> [[VRSRA_N]] 16356 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { 16357 return vrsra_n_s64(a, b, 1); 16358 } 16359 16360 // CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 16361 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16362 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] 16363 // CHECK: ret <8 x i8> [[VRSRA_N]] 16364 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { 16365 return vrsra_n_u8(a, b, 1); 16366 } 16367 16368 // CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 16369 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16370 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16371 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16372 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 16373 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 16374 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]] 16375 // CHECK: ret <4 x i16> [[VRSRA_N]] 16376 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { 16377 return vrsra_n_u16(a, b, 1); 16378 } 16379 16380 // CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 16381 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16382 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16383 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16384 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 16385 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>) 16386 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]] 16387 // CHECK: ret <2 x i32> [[VRSRA_N]] 16388 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { 16389 return vrsra_n_u32(a, b, 1); 16390 } 16391 16392 // CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 16393 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16394 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16395 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16396 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 16397 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>) 16398 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]] 16399 // CHECK: ret <1 x i64> [[VRSRA_N]] 16400 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { 16401 return vrsra_n_u64(a, b, 1); 16402 } 16403 16404 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 16405 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16406 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] 16407 // CHECK: ret <16 x i8> [[VRSRA_N]] 16408 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { 16409 return vrsraq_n_s8(a, b, 1); 16410 } 16411 16412 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 16413 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16414 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16415 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16416 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16417 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16418 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] 16419 // CHECK: ret <8 x i16> [[VRSRA_N]] 16420 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { 16421 return vrsraq_n_s16(a, b, 1); 16422 } 16423 16424 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 16425 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16426 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16427 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16428 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16429 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16430 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] 16431 // CHECK: ret <4 x i32> [[VRSRA_N]] 16432 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { 16433 return vrsraq_n_s32(a, b, 1); 16434 } 16435 16436 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 16437 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16438 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16439 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16440 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16441 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>) 16442 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 16443 // CHECK: ret <2 x i64> [[VRSRA_N]] 16444 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { 16445 return vrsraq_n_s64(a, b, 1); 16446 } 16447 16448 // CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 16449 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 16450 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] 16451 // CHECK: ret <16 x i8> [[VRSRA_N]] 16452 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { 16453 return vrsraq_n_u8(a, b, 1); 16454 } 16455 16456 // CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 16457 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16458 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16459 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16460 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16461 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 16462 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]] 16463 // CHECK: ret <8 x i16> [[VRSRA_N]] 16464 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { 16465 return vrsraq_n_u16(a, b, 1); 16466 } 16467 16468 // CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 16469 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16470 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16471 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16472 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16473 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 16474 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] 16475 // CHECK: ret <4 x i32> [[VRSRA_N]] 16476 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { 16477 return vrsraq_n_u32(a, b, 1); 16478 } 16479 16480 // CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 16481 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16482 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16483 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16484 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16485 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>) 16486 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]] 16487 // CHECK: ret <2 x i64> [[VRSRA_N]] 16488 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { 16489 return vrsraq_n_u64(a, b, 1); 16490 } 16491 16492 16493 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 16494 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16495 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16496 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16497 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16498 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 16499 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 16500 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { 16501 return vrsubhn_s16(a, b); 16502 } 16503 16504 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 16505 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16506 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16507 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16508 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16509 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 16510 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 16511 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> 16512 // CHECK: ret <4 x i16> [[TMP2]] 16513 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { 16514 return vrsubhn_s32(a, b); 16515 } 16516 16517 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 16518 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16519 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16520 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16521 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16522 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 16523 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 16524 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> 16525 // CHECK: ret <2 x i32> [[TMP2]] 16526 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { 16527 return vrsubhn_s64(a, b); 16528 } 16529 16530 // CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 16531 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16532 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16533 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16534 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16535 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 16536 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 16537 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { 16538 return vrsubhn_u16(a, b); 16539 } 16540 16541 // CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 16542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16543 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16544 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16545 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16546 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 16547 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 16548 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> 16549 // CHECK: ret <4 x i16> [[TMP2]] 16550 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { 16551 return vrsubhn_u32(a, b); 16552 } 16553 16554 // CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 16555 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16556 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16557 // CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16558 // CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16559 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 16560 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 16561 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> 16562 // CHECK: ret <2 x i32> [[TMP2]] 16563 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { 16564 return vrsubhn_u64(a, b); 16565 } 16566 16567 16568 // CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 zeroext %a, <8 x i8> %b) #0 { 16569 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 16570 // CHECK: ret <8 x i8> [[VSET_LANE]] 16571 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) { 16572 return vset_lane_u8(a, b, 7); 16573 } 16574 16575 // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 zeroext %a, <4 x i16> %b) #0 { 16576 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16577 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16578 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 16579 // CHECK: ret <4 x i16> [[VSET_LANE]] 16580 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) { 16581 return vset_lane_u16(a, b, 3); 16582 } 16583 16584 // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 { 16585 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16586 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16587 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 16588 // CHECK: ret <2 x i32> [[VSET_LANE]] 16589 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) { 16590 return vset_lane_u32(a, b, 1); 16591 } 16592 16593 // CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 signext %a, <8 x i8> %b) #0 { 16594 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 16595 // CHECK: ret <8 x i8> [[VSET_LANE]] 16596 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) { 16597 return vset_lane_s8(a, b, 7); 16598 } 16599 16600 // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 signext %a, <4 x i16> %b) #0 { 16601 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16602 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16603 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 16604 // CHECK: ret <4 x i16> [[VSET_LANE]] 16605 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) { 16606 return vset_lane_s16(a, b, 3); 16607 } 16608 16609 // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 { 16610 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16612 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 16613 // CHECK: ret <2 x i32> [[VSET_LANE]] 16614 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) { 16615 return vset_lane_s32(a, b, 1); 16616 } 16617 16618 // CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 signext %a, <8 x i8> %b) #0 { 16619 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 16620 // CHECK: ret <8 x i8> [[VSET_LANE]] 16621 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) { 16622 return vset_lane_p8(a, b, 7); 16623 } 16624 16625 // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 signext %a, <4 x i16> %b) #0 { 16626 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16627 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16628 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 16629 // CHECK: ret <4 x i16> [[VSET_LANE]] 16630 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) { 16631 return vset_lane_p16(a, b, 3); 16632 } 16633 16634 // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 { 16635 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> 16636 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 16637 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1 16638 // CHECK: ret <2 x float> [[VSET_LANE]] 16639 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) { 16640 return vset_lane_f32(a, b, 1); 16641 } 16642 16643 // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 { 16644 // CHECK: [[__REINT_246:%.*]] = alloca half, align 2 16645 // CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8 16646 // CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8 16647 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 16648 // CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2 16649 // CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8 16650 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16* 16651 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 16652 // CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>* 16653 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8 16654 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 16655 // CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 16656 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 1 16657 // CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8 16658 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>* 16659 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8 16660 // CHECK: ret <4 x half> [[TMP8]] 16661 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) { 16662 return vset_lane_f16(*a, b, 1); 16663 } 16664 16665 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 zeroext %a, <16 x i8> %b) #0 { 16666 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 16667 // CHECK: ret <16 x i8> [[VSET_LANE]] 16668 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) { 16669 return vsetq_lane_u8(a, b, 15); 16670 } 16671 16672 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 zeroext %a, <8 x i16> %b) #0 { 16673 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16674 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16675 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 16676 // CHECK: ret <8 x i16> [[VSET_LANE]] 16677 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) { 16678 return vsetq_lane_u16(a, b, 7); 16679 } 16680 16681 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 { 16682 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16683 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16684 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 16685 // CHECK: ret <4 x i32> [[VSET_LANE]] 16686 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) { 16687 return vsetq_lane_u32(a, b, 3); 16688 } 16689 16690 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 signext %a, <16 x i8> %b) #0 { 16691 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 16692 // CHECK: ret <16 x i8> [[VSET_LANE]] 16693 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) { 16694 return vsetq_lane_s8(a, b, 15); 16695 } 16696 16697 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 signext %a, <8 x i16> %b) #0 { 16698 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16699 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16700 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 16701 // CHECK: ret <8 x i16> [[VSET_LANE]] 16702 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) { 16703 return vsetq_lane_s16(a, b, 7); 16704 } 16705 16706 // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 { 16707 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16708 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16709 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 16710 // CHECK: ret <4 x i32> [[VSET_LANE]] 16711 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) { 16712 return vsetq_lane_s32(a, b, 3); 16713 } 16714 16715 // CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 signext %a, <16 x i8> %b) #0 { 16716 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 16717 // CHECK: ret <16 x i8> [[VSET_LANE]] 16718 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) { 16719 return vsetq_lane_p8(a, b, 15); 16720 } 16721 16722 // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 signext %a, <8 x i16> %b) #0 { 16723 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16724 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16725 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 16726 // CHECK: ret <8 x i16> [[VSET_LANE]] 16727 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) { 16728 return vsetq_lane_p16(a, b, 7); 16729 } 16730 16731 // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 { 16732 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> 16733 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 16734 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3 16735 // CHECK: ret <4 x float> [[VSET_LANE]] 16736 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { 16737 return vsetq_lane_f32(a, b, 3); 16738 } 16739 16740 // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 { 16741 // CHECK: [[__REINT_248:%.*]] = alloca half, align 2 16742 // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16 16743 // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16 16744 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 16745 // CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2 16746 // CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16 16747 // CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16* 16748 // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 16749 // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>* 16750 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16 16751 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 16752 // CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 16753 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3 16754 // CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16 16755 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>* 16756 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16 16757 // CHECK: ret <8 x half> [[TMP8]] 16758 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) { 16759 return vsetq_lane_f16(*a, b, 3); 16760 } 16761 16762 // The optimizer is able to get rid of all moves now. 16763 // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 { 16764 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16765 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16766 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 16767 // CHECK: ret <1 x i64> [[VSET_LANE]] 16768 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) { 16769 return vset_lane_s64(a, b, 0); 16770 } 16771 16772 // The optimizer is able to get rid of all moves now. 16773 // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 { 16774 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16775 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16776 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 16777 // CHECK: ret <1 x i64> [[VSET_LANE]] 16778 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) { 16779 return vset_lane_u64(a, b, 0); 16780 } 16781 16782 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 { 16783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16784 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16785 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 16786 // CHECK: ret <2 x i64> [[VSET_LANE]] 16787 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) { 16788 return vsetq_lane_s64(a, b, 1); 16789 } 16790 16791 // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 { 16792 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16793 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16794 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 16795 // CHECK: ret <2 x i64> [[VSET_LANE]] 16796 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) { 16797 return vsetq_lane_u64(a, b, 1); 16798 } 16799 16800 16801 // CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 16802 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 16803 // CHECK: ret <8 x i8> [[VSHL_V_I]] 16804 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { 16805 return vshl_s8(a, b); 16806 } 16807 16808 // CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 16809 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16810 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16811 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16812 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 16813 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 16814 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 16815 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> 16816 // CHECK: ret <4 x i16> [[TMP2]] 16817 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { 16818 return vshl_s16(a, b); 16819 } 16820 16821 // CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 16822 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16823 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16824 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16825 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 16826 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 16827 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 16828 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> 16829 // CHECK: ret <2 x i32> [[TMP2]] 16830 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { 16831 return vshl_s32(a, b); 16832 } 16833 16834 // CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { 16835 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16836 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16837 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16838 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 16839 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 16840 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 16841 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> 16842 // CHECK: ret <1 x i64> [[TMP2]] 16843 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { 16844 return vshl_s64(a, b); 16845 } 16846 16847 // CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 16848 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 16849 // CHECK: ret <8 x i8> [[VSHL_V_I]] 16850 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { 16851 return vshl_u8(a, b); 16852 } 16853 16854 // CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 16855 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16856 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16857 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16858 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 16859 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 16860 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 16861 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> 16862 // CHECK: ret <4 x i16> [[TMP2]] 16863 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { 16864 return vshl_u16(a, b); 16865 } 16866 16867 // CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 16868 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16869 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16870 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 16871 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 16872 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 16873 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 16874 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> 16875 // CHECK: ret <2 x i32> [[TMP2]] 16876 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { 16877 return vshl_u32(a, b); 16878 } 16879 16880 // CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { 16881 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16882 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16883 // CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 16884 // CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 16885 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 16886 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 16887 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> 16888 // CHECK: ret <1 x i64> [[TMP2]] 16889 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { 16890 return vshl_u64(a, b); 16891 } 16892 16893 // CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 16894 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 16895 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] 16896 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { 16897 return vshlq_s8(a, b); 16898 } 16899 16900 // CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 16901 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16902 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16903 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16904 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16905 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 16906 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 16907 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> 16908 // CHECK: ret <8 x i16> [[TMP2]] 16909 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { 16910 return vshlq_s16(a, b); 16911 } 16912 16913 // CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 16914 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16915 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16916 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16917 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16918 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 16919 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 16920 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> 16921 // CHECK: ret <4 x i32> [[TMP2]] 16922 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { 16923 return vshlq_s32(a, b); 16924 } 16925 16926 // CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 16927 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16928 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16929 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16930 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16931 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 16932 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 16933 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> 16934 // CHECK: ret <2 x i64> [[TMP2]] 16935 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { 16936 return vshlq_s64(a, b); 16937 } 16938 16939 // CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 16940 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 16941 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] 16942 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { 16943 return vshlq_u8(a, b); 16944 } 16945 16946 // CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 16947 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16948 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16949 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 16950 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 16951 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 16952 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 16953 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> 16954 // CHECK: ret <8 x i16> [[TMP2]] 16955 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { 16956 return vshlq_u16(a, b); 16957 } 16958 16959 // CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 16960 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16961 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16962 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 16963 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 16964 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 16965 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 16966 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> 16967 // CHECK: ret <4 x i32> [[TMP2]] 16968 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { 16969 return vshlq_u32(a, b); 16970 } 16971 16972 // CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 16973 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16974 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16975 // CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 16976 // CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 16977 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 16978 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 16979 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> 16980 // CHECK: ret <2 x i64> [[TMP2]] 16981 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { 16982 return vshlq_u64(a, b); 16983 } 16984 16985 16986 // CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { 16987 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> 16988 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 16989 // CHECK: ret <8 x i16> [[VSHLL_N]] 16990 int16x8_t test_vshll_n_s8(int8x8_t a) { 16991 return vshll_n_s8(a, 1); 16992 } 16993 16994 // CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { 16995 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16996 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 16997 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 16998 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1> 16999 // CHECK: ret <4 x i32> [[VSHLL_N]] 17000 int32x4_t test_vshll_n_s16(int16x4_t a) { 17001 return vshll_n_s16(a, 1); 17002 } 17003 17004 // CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { 17005 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17006 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17007 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 17008 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1> 17009 // CHECK: ret <2 x i64> [[VSHLL_N]] 17010 int64x2_t test_vshll_n_s32(int32x2_t a) { 17011 return vshll_n_s32(a, 1); 17012 } 17013 17014 // CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { 17015 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> 17016 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17017 // CHECK: ret <8 x i16> [[VSHLL_N]] 17018 uint16x8_t test_vshll_n_u8(uint8x8_t a) { 17019 return vshll_n_u8(a, 1); 17020 } 17021 17022 // CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { 17023 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17024 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17025 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 17026 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1> 17027 // CHECK: ret <4 x i32> [[VSHLL_N]] 17028 uint32x4_t test_vshll_n_u16(uint16x4_t a) { 17029 return vshll_n_u16(a, 1); 17030 } 17031 17032 // CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { 17033 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17034 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17035 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 17036 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1> 17037 // CHECK: ret <2 x i64> [[VSHLL_N]] 17038 uint64x2_t test_vshll_n_u32(uint32x2_t a) { 17039 return vshll_n_u32(a, 1); 17040 } 17041 17042 17043 // CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 { 17044 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17045 // CHECK: ret <8 x i8> [[VSHL_N]] 17046 int8x8_t test_vshl_n_s8(int8x8_t a) { 17047 return vshl_n_s8(a, 1); 17048 } 17049 17050 // CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 { 17051 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17052 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17053 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1> 17054 // CHECK: ret <4 x i16> [[VSHL_N]] 17055 int16x4_t test_vshl_n_s16(int16x4_t a) { 17056 return vshl_n_s16(a, 1); 17057 } 17058 17059 // CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 { 17060 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17061 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17062 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1> 17063 // CHECK: ret <2 x i32> [[VSHL_N]] 17064 int32x2_t test_vshl_n_s32(int32x2_t a) { 17065 return vshl_n_s32(a, 1); 17066 } 17067 17068 // CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 { 17069 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17070 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17071 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1> 17072 // CHECK: ret <1 x i64> [[VSHL_N]] 17073 int64x1_t test_vshl_n_s64(int64x1_t a) { 17074 return vshl_n_s64(a, 1); 17075 } 17076 17077 // CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 { 17078 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17079 // CHECK: ret <8 x i8> [[VSHL_N]] 17080 uint8x8_t test_vshl_n_u8(uint8x8_t a) { 17081 return vshl_n_u8(a, 1); 17082 } 17083 17084 // CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 { 17085 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17086 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17087 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1> 17088 // CHECK: ret <4 x i16> [[VSHL_N]] 17089 uint16x4_t test_vshl_n_u16(uint16x4_t a) { 17090 return vshl_n_u16(a, 1); 17091 } 17092 17093 // CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 { 17094 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17095 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17096 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1> 17097 // CHECK: ret <2 x i32> [[VSHL_N]] 17098 uint32x2_t test_vshl_n_u32(uint32x2_t a) { 17099 return vshl_n_u32(a, 1); 17100 } 17101 17102 // CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 { 17103 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17104 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17105 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1> 17106 // CHECK: ret <1 x i64> [[VSHL_N]] 17107 uint64x1_t test_vshl_n_u64(uint64x1_t a) { 17108 return vshl_n_u64(a, 1); 17109 } 17110 17111 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 { 17112 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17113 // CHECK: ret <16 x i8> [[VSHL_N]] 17114 int8x16_t test_vshlq_n_s8(int8x16_t a) { 17115 return vshlq_n_s8(a, 1); 17116 } 17117 17118 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 { 17119 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17120 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17121 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17122 // CHECK: ret <8 x i16> [[VSHL_N]] 17123 int16x8_t test_vshlq_n_s16(int16x8_t a) { 17124 return vshlq_n_s16(a, 1); 17125 } 17126 17127 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 { 17128 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17129 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17130 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17131 // CHECK: ret <4 x i32> [[VSHL_N]] 17132 int32x4_t test_vshlq_n_s32(int32x4_t a) { 17133 return vshlq_n_s32(a, 1); 17134 } 17135 17136 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 { 17137 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17138 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17139 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1> 17140 // CHECK: ret <2 x i64> [[VSHL_N]] 17141 int64x2_t test_vshlq_n_s64(int64x2_t a) { 17142 return vshlq_n_s64(a, 1); 17143 } 17144 17145 // CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 { 17146 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17147 // CHECK: ret <16 x i8> [[VSHL_N]] 17148 uint8x16_t test_vshlq_n_u8(uint8x16_t a) { 17149 return vshlq_n_u8(a, 1); 17150 } 17151 17152 // CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 { 17153 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17154 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17155 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17156 // CHECK: ret <8 x i16> [[VSHL_N]] 17157 uint16x8_t test_vshlq_n_u16(uint16x8_t a) { 17158 return vshlq_n_u16(a, 1); 17159 } 17160 17161 // CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 { 17162 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17163 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17164 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17165 // CHECK: ret <4 x i32> [[VSHL_N]] 17166 uint32x4_t test_vshlq_n_u32(uint32x4_t a) { 17167 return vshlq_n_u32(a, 1); 17168 } 17169 17170 // CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 { 17171 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17172 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17173 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1> 17174 // CHECK: ret <2 x i64> [[VSHL_N]] 17175 uint64x2_t test_vshlq_n_u64(uint64x2_t a) { 17176 return vshlq_n_u64(a, 1); 17177 } 17178 17179 17180 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 { 17181 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17182 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17183 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17184 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 17185 // CHECK: ret <8 x i8> [[VSHRN_N]] 17186 int8x8_t test_vshrn_n_s16(int16x8_t a) { 17187 return vshrn_n_s16(a, 1); 17188 } 17189 17190 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 { 17191 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17192 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17193 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17194 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 17195 // CHECK: ret <4 x i16> [[VSHRN_N]] 17196 int16x4_t test_vshrn_n_s32(int32x4_t a) { 17197 return vshrn_n_s32(a, 1); 17198 } 17199 17200 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 { 17201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17202 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17203 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1> 17204 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 17205 // CHECK: ret <2 x i32> [[VSHRN_N]] 17206 int32x2_t test_vshrn_n_s64(int64x2_t a) { 17207 return vshrn_n_s64(a, 1); 17208 } 17209 17210 // CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 { 17211 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17212 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17213 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17214 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 17215 // CHECK: ret <8 x i8> [[VSHRN_N]] 17216 uint8x8_t test_vshrn_n_u16(uint16x8_t a) { 17217 return vshrn_n_u16(a, 1); 17218 } 17219 17220 // CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 { 17221 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17222 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17223 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17224 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 17225 // CHECK: ret <4 x i16> [[VSHRN_N]] 17226 uint16x4_t test_vshrn_n_u32(uint32x4_t a) { 17227 return vshrn_n_u32(a, 1); 17228 } 17229 17230 // CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 { 17231 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17232 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17233 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1> 17234 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 17235 // CHECK: ret <2 x i32> [[VSHRN_N]] 17236 uint32x2_t test_vshrn_n_u64(uint64x2_t a) { 17237 return vshrn_n_u64(a, 1); 17238 } 17239 17240 17241 // CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 { 17242 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17243 // CHECK: ret <8 x i8> [[VSHR_N]] 17244 int8x8_t test_vshr_n_s8(int8x8_t a) { 17245 return vshr_n_s8(a, 1); 17246 } 17247 17248 // CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 { 17249 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17250 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17251 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1> 17252 // CHECK: ret <4 x i16> [[VSHR_N]] 17253 int16x4_t test_vshr_n_s16(int16x4_t a) { 17254 return vshr_n_s16(a, 1); 17255 } 17256 17257 // CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 { 17258 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17259 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17260 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1> 17261 // CHECK: ret <2 x i32> [[VSHR_N]] 17262 int32x2_t test_vshr_n_s32(int32x2_t a) { 17263 return vshr_n_s32(a, 1); 17264 } 17265 17266 // CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 { 17267 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17268 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17269 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1> 17270 // CHECK: ret <1 x i64> [[VSHR_N]] 17271 int64x1_t test_vshr_n_s64(int64x1_t a) { 17272 return vshr_n_s64(a, 1); 17273 } 17274 17275 // CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 { 17276 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17277 // CHECK: ret <8 x i8> [[VSHR_N]] 17278 uint8x8_t test_vshr_n_u8(uint8x8_t a) { 17279 return vshr_n_u8(a, 1); 17280 } 17281 17282 // CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 { 17283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17284 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17285 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1> 17286 // CHECK: ret <4 x i16> [[VSHR_N]] 17287 uint16x4_t test_vshr_n_u16(uint16x4_t a) { 17288 return vshr_n_u16(a, 1); 17289 } 17290 17291 // CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 { 17292 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17293 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17294 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1> 17295 // CHECK: ret <2 x i32> [[VSHR_N]] 17296 uint32x2_t test_vshr_n_u32(uint32x2_t a) { 17297 return vshr_n_u32(a, 1); 17298 } 17299 17300 // CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 { 17301 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17302 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17303 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1> 17304 // CHECK: ret <1 x i64> [[VSHR_N]] 17305 uint64x1_t test_vshr_n_u64(uint64x1_t a) { 17306 return vshr_n_u64(a, 1); 17307 } 17308 17309 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 { 17310 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17311 // CHECK: ret <16 x i8> [[VSHR_N]] 17312 int8x16_t test_vshrq_n_s8(int8x16_t a) { 17313 return vshrq_n_s8(a, 1); 17314 } 17315 17316 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 { 17317 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17318 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17319 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17320 // CHECK: ret <8 x i16> [[VSHR_N]] 17321 int16x8_t test_vshrq_n_s16(int16x8_t a) { 17322 return vshrq_n_s16(a, 1); 17323 } 17324 17325 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 { 17326 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17327 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17328 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17329 // CHECK: ret <4 x i32> [[VSHR_N]] 17330 int32x4_t test_vshrq_n_s32(int32x4_t a) { 17331 return vshrq_n_s32(a, 1); 17332 } 17333 17334 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 { 17335 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17336 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17337 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1> 17338 // CHECK: ret <2 x i64> [[VSHR_N]] 17339 int64x2_t test_vshrq_n_s64(int64x2_t a) { 17340 return vshrq_n_s64(a, 1); 17341 } 17342 17343 // CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 { 17344 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17345 // CHECK: ret <16 x i8> [[VSHR_N]] 17346 uint8x16_t test_vshrq_n_u8(uint8x16_t a) { 17347 return vshrq_n_u8(a, 1); 17348 } 17349 17350 // CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 { 17351 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17352 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17353 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17354 // CHECK: ret <8 x i16> [[VSHR_N]] 17355 uint16x8_t test_vshrq_n_u16(uint16x8_t a) { 17356 return vshrq_n_u16(a, 1); 17357 } 17358 17359 // CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 { 17360 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17361 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17362 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1> 17363 // CHECK: ret <4 x i32> [[VSHR_N]] 17364 uint32x4_t test_vshrq_n_u32(uint32x4_t a) { 17365 return vshrq_n_u32(a, 1); 17366 } 17367 17368 // CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 { 17369 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17370 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17371 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1> 17372 // CHECK: ret <2 x i64> [[VSHR_N]] 17373 uint64x2_t test_vshrq_n_u64(uint64x2_t a) { 17374 return vshrq_n_u64(a, 1); 17375 } 17376 17377 17378 // CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 17379 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17380 // CHECK: ret <8 x i8> [[VSLI_N]] 17381 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { 17382 return vsli_n_s8(a, b, 1); 17383 } 17384 17385 // CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 17386 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17387 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17388 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17389 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17390 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 17391 // CHECK: ret <4 x i16> [[VSLI_N2]] 17392 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { 17393 return vsli_n_s16(a, b, 1); 17394 } 17395 17396 // CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 17397 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17398 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17399 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17400 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17401 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>) 17402 // CHECK: ret <2 x i32> [[VSLI_N2]] 17403 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { 17404 return vsli_n_s32(a, b, 1); 17405 } 17406 17407 // CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 17408 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17409 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17410 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17411 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17412 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>) 17413 // CHECK: ret <1 x i64> [[VSLI_N2]] 17414 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { 17415 return vsli_n_s64(a, b, 1); 17416 } 17417 17418 // CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 17419 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17420 // CHECK: ret <8 x i8> [[VSLI_N]] 17421 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { 17422 return vsli_n_u8(a, b, 1); 17423 } 17424 17425 // CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 17426 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17427 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17428 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17429 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17430 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 17431 // CHECK: ret <4 x i16> [[VSLI_N2]] 17432 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { 17433 return vsli_n_u16(a, b, 1); 17434 } 17435 17436 // CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 17437 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17438 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17439 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17440 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17441 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>) 17442 // CHECK: ret <2 x i32> [[VSLI_N2]] 17443 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { 17444 return vsli_n_u32(a, b, 1); 17445 } 17446 17447 // CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 17448 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17449 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17450 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17451 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17452 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>) 17453 // CHECK: ret <1 x i64> [[VSLI_N2]] 17454 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { 17455 return vsli_n_u64(a, b, 1); 17456 } 17457 17458 // CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { 17459 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17460 // CHECK: ret <8 x i8> [[VSLI_N]] 17461 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { 17462 return vsli_n_p8(a, b, 1); 17463 } 17464 17465 // CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { 17466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17467 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17468 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17469 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17470 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>) 17471 // CHECK: ret <4 x i16> [[VSLI_N2]] 17472 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { 17473 return vsli_n_p16(a, b, 1); 17474 } 17475 17476 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 17477 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17478 // CHECK: ret <16 x i8> [[VSLI_N]] 17479 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { 17480 return vsliq_n_s8(a, b, 1); 17481 } 17482 17483 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 17484 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17485 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17486 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17487 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17488 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 17489 // CHECK: ret <8 x i16> [[VSLI_N2]] 17490 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { 17491 return vsliq_n_s16(a, b, 1); 17492 } 17493 17494 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 17495 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17496 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17497 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17498 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17499 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 17500 // CHECK: ret <4 x i32> [[VSLI_N2]] 17501 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { 17502 return vsliq_n_s32(a, b, 1); 17503 } 17504 17505 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 17506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17507 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17508 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17509 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17510 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>) 17511 // CHECK: ret <2 x i64> [[VSLI_N2]] 17512 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { 17513 return vsliq_n_s64(a, b, 1); 17514 } 17515 17516 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 17517 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17518 // CHECK: ret <16 x i8> [[VSLI_N]] 17519 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { 17520 return vsliq_n_u8(a, b, 1); 17521 } 17522 17523 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 17524 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17525 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17526 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17527 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17528 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 17529 // CHECK: ret <8 x i16> [[VSLI_N2]] 17530 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { 17531 return vsliq_n_u16(a, b, 1); 17532 } 17533 17534 // CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 17535 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17536 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17537 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17538 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17539 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>) 17540 // CHECK: ret <4 x i32> [[VSLI_N2]] 17541 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { 17542 return vsliq_n_u32(a, b, 1); 17543 } 17544 17545 // CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 17546 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17547 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17548 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17549 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17550 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>) 17551 // CHECK: ret <2 x i64> [[VSLI_N2]] 17552 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { 17553 return vsliq_n_u64(a, b, 1); 17554 } 17555 17556 // CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { 17557 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 17558 // CHECK: ret <16 x i8> [[VSLI_N]] 17559 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { 17560 return vsliq_n_p8(a, b, 1); 17561 } 17562 17563 // CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { 17564 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17565 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17566 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17567 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17568 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 17569 // CHECK: ret <8 x i16> [[VSLI_N2]] 17570 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { 17571 return vsliq_n_p16(a, b, 1); 17572 } 17573 17574 17575 // CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 17576 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17577 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 17578 // CHECK: ret <8 x i8> [[TMP0]] 17579 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { 17580 return vsra_n_s8(a, b, 1); 17581 } 17582 17583 // CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 17584 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17585 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17586 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17587 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17588 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1> 17589 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 17590 // CHECK: ret <4 x i16> [[TMP4]] 17591 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { 17592 return vsra_n_s16(a, b, 1); 17593 } 17594 17595 // CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 17596 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17597 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17598 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17599 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17600 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 1, i32 1> 17601 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 17602 // CHECK: ret <2 x i32> [[TMP4]] 17603 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { 17604 return vsra_n_s32(a, b, 1); 17605 } 17606 17607 // CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 17608 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17609 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17610 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17611 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17612 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1> 17613 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 17614 // CHECK: ret <1 x i64> [[TMP4]] 17615 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { 17616 return vsra_n_s64(a, b, 1); 17617 } 17618 17619 // CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 17620 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17621 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 17622 // CHECK: ret <8 x i8> [[TMP0]] 17623 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { 17624 return vsra_n_u8(a, b, 1); 17625 } 17626 17627 // CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 17628 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17629 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17630 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17631 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17632 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1> 17633 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 17634 // CHECK: ret <4 x i16> [[TMP4]] 17635 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { 17636 return vsra_n_u16(a, b, 1); 17637 } 17638 17639 // CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 17640 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17641 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17642 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17643 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17644 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1> 17645 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 17646 // CHECK: ret <2 x i32> [[TMP4]] 17647 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { 17648 return vsra_n_u32(a, b, 1); 17649 } 17650 17651 // CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 17652 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17653 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17654 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17655 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17656 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1> 17657 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 17658 // CHECK: ret <1 x i64> [[TMP4]] 17659 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { 17660 return vsra_n_u64(a, b, 1); 17661 } 17662 17663 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 17664 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17665 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 17666 // CHECK: ret <16 x i8> [[TMP0]] 17667 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { 17668 return vsraq_n_s8(a, b, 1); 17669 } 17670 17671 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 17672 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17673 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17674 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17675 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17676 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17677 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 17678 // CHECK: ret <8 x i16> [[TMP4]] 17679 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { 17680 return vsraq_n_s16(a, b, 1); 17681 } 17682 17683 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 17684 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17685 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17686 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17687 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17688 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1> 17689 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 17690 // CHECK: ret <4 x i32> [[TMP4]] 17691 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { 17692 return vsraq_n_s32(a, b, 1); 17693 } 17694 17695 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 17696 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17697 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17698 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17699 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17700 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 1, i64 1> 17701 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 17702 // CHECK: ret <2 x i64> [[TMP4]] 17703 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { 17704 return vsraq_n_s64(a, b, 1); 17705 } 17706 17707 // CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 17708 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 17709 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 17710 // CHECK: ret <16 x i8> [[TMP0]] 17711 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { 17712 return vsraq_n_u8(a, b, 1); 17713 } 17714 17715 // CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 17716 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17717 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17718 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17719 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17720 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 17721 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 17722 // CHECK: ret <8 x i16> [[TMP4]] 17723 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { 17724 return vsraq_n_u16(a, b, 1); 17725 } 17726 17727 // CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 17728 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17729 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17730 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17731 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17732 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1> 17733 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 17734 // CHECK: ret <4 x i32> [[TMP4]] 17735 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { 17736 return vsraq_n_u32(a, b, 1); 17737 } 17738 17739 // CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 17740 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17741 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17742 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17743 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17744 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1> 17745 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 17746 // CHECK: ret <2 x i64> [[TMP4]] 17747 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { 17748 return vsraq_n_u64(a, b, 1); 17749 } 17750 17751 17752 // CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { 17753 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17754 // CHECK: ret <8 x i8> [[VSLI_N]] 17755 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { 17756 return vsri_n_s8(a, b, 1); 17757 } 17758 17759 // CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { 17760 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17761 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17762 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17763 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17764 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 17765 // CHECK: ret <4 x i16> [[VSLI_N2]] 17766 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { 17767 return vsri_n_s16(a, b, 1); 17768 } 17769 17770 // CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { 17771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17772 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17773 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17774 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17775 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>) 17776 // CHECK: ret <2 x i32> [[VSLI_N2]] 17777 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { 17778 return vsri_n_s32(a, b, 1); 17779 } 17780 17781 // CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { 17782 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17783 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17784 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17785 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17786 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>) 17787 // CHECK: ret <1 x i64> [[VSLI_N2]] 17788 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { 17789 return vsri_n_s64(a, b, 1); 17790 } 17791 17792 // CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { 17793 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17794 // CHECK: ret <8 x i8> [[VSLI_N]] 17795 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { 17796 return vsri_n_u8(a, b, 1); 17797 } 17798 17799 // CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { 17800 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17801 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17802 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17803 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17804 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 17805 // CHECK: ret <4 x i16> [[VSLI_N2]] 17806 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { 17807 return vsri_n_u16(a, b, 1); 17808 } 17809 17810 // CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { 17811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 17812 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 17813 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 17814 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 17815 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>) 17816 // CHECK: ret <2 x i32> [[VSLI_N2]] 17817 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { 17818 return vsri_n_u32(a, b, 1); 17819 } 17820 17821 // CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { 17822 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17823 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 17824 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17825 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 17826 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>) 17827 // CHECK: ret <1 x i64> [[VSLI_N2]] 17828 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { 17829 return vsri_n_u64(a, b, 1); 17830 } 17831 17832 // CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { 17833 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17834 // CHECK: ret <8 x i8> [[VSLI_N]] 17835 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { 17836 return vsri_n_p8(a, b, 1); 17837 } 17838 17839 // CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { 17840 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 17841 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 17842 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 17843 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 17844 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>) 17845 // CHECK: ret <4 x i16> [[VSLI_N2]] 17846 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { 17847 return vsri_n_p16(a, b, 1); 17848 } 17849 17850 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { 17851 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17852 // CHECK: ret <16 x i8> [[VSLI_N]] 17853 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { 17854 return vsriq_n_s8(a, b, 1); 17855 } 17856 17857 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { 17858 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17859 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17860 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17861 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17862 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 17863 // CHECK: ret <8 x i16> [[VSLI_N2]] 17864 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { 17865 return vsriq_n_s16(a, b, 1); 17866 } 17867 17868 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { 17869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17870 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17871 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17872 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17873 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 17874 // CHECK: ret <4 x i32> [[VSLI_N2]] 17875 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { 17876 return vsriq_n_s32(a, b, 1); 17877 } 17878 17879 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { 17880 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17881 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17882 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17883 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17884 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>) 17885 // CHECK: ret <2 x i64> [[VSLI_N2]] 17886 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { 17887 return vsriq_n_s64(a, b, 1); 17888 } 17889 17890 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { 17891 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17892 // CHECK: ret <16 x i8> [[VSLI_N]] 17893 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { 17894 return vsriq_n_u8(a, b, 1); 17895 } 17896 17897 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { 17898 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17899 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17900 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17901 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17902 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 17903 // CHECK: ret <8 x i16> [[VSLI_N2]] 17904 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { 17905 return vsriq_n_u16(a, b, 1); 17906 } 17907 17908 // CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { 17909 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 17910 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17911 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 17912 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17913 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>) 17914 // CHECK: ret <4 x i32> [[VSLI_N2]] 17915 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { 17916 return vsriq_n_u32(a, b, 1); 17917 } 17918 17919 // CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { 17920 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 17921 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17922 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 17923 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17924 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>) 17925 // CHECK: ret <2 x i64> [[VSLI_N2]] 17926 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) { 17927 return vsriq_n_u64(a, b, 1); 17928 } 17929 17930 // CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { 17931 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 17932 // CHECK: ret <16 x i8> [[VSLI_N]] 17933 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { 17934 return vsriq_n_p8(a, b, 1); 17935 } 17936 17937 // CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { 17938 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 17939 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17940 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 17941 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17942 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>) 17943 // CHECK: ret <8 x i16> [[VSLI_N2]] 17944 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { 17945 return vsriq_n_p16(a, b, 1); 17946 } 17947 17948 17949 // CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 { 17950 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) 17951 // CHECK: ret void 17952 void test_vst1q_u8(uint8_t * a, uint8x16_t b) { 17953 vst1q_u8(a, b); 17954 } 17955 17956 // CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 { 17957 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 17958 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17959 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17960 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2) 17961 // CHECK: ret void 17962 void test_vst1q_u16(uint16_t * a, uint16x8_t b) { 17963 vst1q_u16(a, b); 17964 } 17965 17966 // CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 { 17967 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 17968 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 17969 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 17970 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4) 17971 // CHECK: ret void 17972 void test_vst1q_u32(uint32_t * a, uint32x4_t b) { 17973 vst1q_u32(a, b); 17974 } 17975 17976 // CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 { 17977 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 17978 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 17979 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 17980 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4) 17981 // CHECK: ret void 17982 void test_vst1q_u64(uint64_t * a, uint64x2_t b) { 17983 vst1q_u64(a, b); 17984 } 17985 17986 // CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 { 17987 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) 17988 // CHECK: ret void 17989 void test_vst1q_s8(int8_t * a, int8x16_t b) { 17990 vst1q_s8(a, b); 17991 } 17992 17993 // CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 { 17994 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 17995 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 17996 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 17997 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2) 17998 // CHECK: ret void 17999 void test_vst1q_s16(int16_t * a, int16x8_t b) { 18000 vst1q_s16(a, b); 18001 } 18002 18003 // CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 { 18004 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18005 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 18006 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 18007 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4) 18008 // CHECK: ret void 18009 void test_vst1q_s32(int32_t * a, int32x4_t b) { 18010 vst1q_s32(a, b); 18011 } 18012 18013 // CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 { 18014 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18015 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 18016 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 18017 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4) 18018 // CHECK: ret void 18019 void test_vst1q_s64(int64_t * a, int64x2_t b) { 18020 vst1q_s64(a, b); 18021 } 18022 18023 // CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 { 18024 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 18025 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 18026 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18027 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2) 18028 // CHECK: ret void 18029 void test_vst1q_f16(float16_t * a, float16x8_t b) { 18030 vst1q_f16(a, b); 18031 } 18032 18033 // CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 { 18034 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 18035 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 18036 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 18037 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* [[TMP0]], <4 x float> [[TMP2]], i32 4) 18038 // CHECK: ret void 18039 void test_vst1q_f32(float32_t * a, float32x4_t b) { 18040 vst1q_f32(a, b); 18041 } 18042 18043 // CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 { 18044 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) 18045 // CHECK: ret void 18046 void test_vst1q_p8(poly8_t * a, poly8x16_t b) { 18047 vst1q_p8(a, b); 18048 } 18049 18050 // CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 { 18051 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18052 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 18053 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18054 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2) 18055 // CHECK: ret void 18056 void test_vst1q_p16(poly16_t * a, poly16x8_t b) { 18057 vst1q_p16(a, b); 18058 } 18059 18060 // CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 { 18061 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) 18062 // CHECK: ret void 18063 void test_vst1_u8(uint8_t * a, uint8x8_t b) { 18064 vst1_u8(a, b); 18065 } 18066 18067 // CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 { 18068 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18069 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18070 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18071 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2) 18072 // CHECK: ret void 18073 void test_vst1_u16(uint16_t * a, uint16x4_t b) { 18074 vst1_u16(a, b); 18075 } 18076 18077 // CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 { 18078 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18079 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 18080 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 18081 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4) 18082 // CHECK: ret void 18083 void test_vst1_u32(uint32_t * a, uint32x2_t b) { 18084 vst1_u32(a, b); 18085 } 18086 18087 // CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 { 18088 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18089 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18090 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18091 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4) 18092 // CHECK: ret void 18093 void test_vst1_u64(uint64_t * a, uint64x1_t b) { 18094 vst1_u64(a, b); 18095 } 18096 18097 // CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 { 18098 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) 18099 // CHECK: ret void 18100 void test_vst1_s8(int8_t * a, int8x8_t b) { 18101 vst1_s8(a, b); 18102 } 18103 18104 // CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 { 18105 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18106 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18107 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18108 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2) 18109 // CHECK: ret void 18110 void test_vst1_s16(int16_t * a, int16x4_t b) { 18111 vst1_s16(a, b); 18112 } 18113 18114 // CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 { 18115 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18116 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 18117 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 18118 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4) 18119 // CHECK: ret void 18120 void test_vst1_s32(int32_t * a, int32x2_t b) { 18121 vst1_s32(a, b); 18122 } 18123 18124 // CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 { 18125 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18126 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18127 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18128 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4) 18129 // CHECK: ret void 18130 void test_vst1_s64(int64_t * a, int64x1_t b) { 18131 vst1_s64(a, b); 18132 } 18133 18134 // CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 { 18135 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 18136 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 18137 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18138 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2) 18139 // CHECK: ret void 18140 void test_vst1_f16(float16_t * a, float16x4_t b) { 18141 vst1_f16(a, b); 18142 } 18143 18144 // CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 { 18145 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 18146 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 18147 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 18148 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* [[TMP0]], <2 x float> [[TMP2]], i32 4) 18149 // CHECK: ret void 18150 void test_vst1_f32(float32_t * a, float32x2_t b) { 18151 vst1_f32(a, b); 18152 } 18153 18154 // CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 { 18155 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) 18156 // CHECK: ret void 18157 void test_vst1_p8(poly8_t * a, poly8x8_t b) { 18158 vst1_p8(a, b); 18159 } 18160 18161 // CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 { 18162 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18163 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18164 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18165 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2) 18166 // CHECK: ret void 18167 void test_vst1_p16(poly16_t * a, poly16x4_t b) { 18168 vst1_p16(a, b); 18169 } 18170 18171 18172 // CHECK-LABEL: define void @test_vst1q_lane_u8(i8* %a, <16 x i8> %b) #0 { 18173 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 18174 // CHECK: store i8 [[TMP0]], i8* %a, align 1 18175 // CHECK: ret void 18176 void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) { 18177 vst1q_lane_u8(a, b, 15); 18178 } 18179 18180 // CHECK-LABEL: define void @test_vst1q_lane_u16(i16* %a, <8 x i16> %b) #0 { 18181 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18182 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 18183 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18184 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 18185 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18186 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18187 // CHECK: ret void 18188 void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) { 18189 vst1q_lane_u16(a, b, 7); 18190 } 18191 18192 // CHECK-LABEL: define void @test_vst1q_lane_u32(i32* %a, <4 x i32> %b) #0 { 18193 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18194 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 18195 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 18196 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 18197 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 18198 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4 18199 // CHECK: ret void 18200 void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) { 18201 vst1q_lane_u32(a, b, 3); 18202 } 18203 18204 // CHECK-LABEL: define void @test_vst1q_lane_u64(i64* %a, <2 x i64> %b) #0 { 18205 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18206 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 18207 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 18208 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1> 18209 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4) 18210 // CHECK: ret void 18211 void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) { 18212 vst1q_lane_u64(a, b, 1); 18213 } 18214 18215 // CHECK-LABEL: define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) #0 { 18216 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 18217 // CHECK: store i8 [[TMP0]], i8* %a, align 1 18218 // CHECK: ret void 18219 void test_vst1q_lane_s8(int8_t * a, int8x16_t b) { 18220 vst1q_lane_s8(a, b, 15); 18221 } 18222 18223 // CHECK-LABEL: define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) #0 { 18224 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18225 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 18226 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18227 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 18228 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18229 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18230 // CHECK: ret void 18231 void test_vst1q_lane_s16(int16_t * a, int16x8_t b) { 18232 vst1q_lane_s16(a, b, 7); 18233 } 18234 18235 // CHECK-LABEL: define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) #0 { 18236 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18237 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 18238 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 18239 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 18240 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 18241 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4 18242 // CHECK: ret void 18243 void test_vst1q_lane_s32(int32_t * a, int32x4_t b) { 18244 vst1q_lane_s32(a, b, 3); 18245 } 18246 18247 // CHECK-LABEL: define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) #0 { 18248 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18249 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 18250 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 18251 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1> 18252 // CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4) 18253 // CHECK: ret void 18254 void test_vst1q_lane_s64(int64_t * a, int64x2_t b) { 18255 vst1q_lane_s64(a, b, 1); 18256 } 18257 18258 // CHECK-LABEL: define void @test_vst1q_lane_f16(half* %a, <8 x half> %b) #0 { 18259 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 18260 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 18261 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18262 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 18263 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18264 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18265 // CHECK: ret void 18266 void test_vst1q_lane_f16(float16_t * a, float16x8_t b) { 18267 vst1q_lane_f16(a, b, 7); 18268 } 18269 18270 // CHECK-LABEL: define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) #0 { 18271 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 18272 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 18273 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 18274 // CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 18275 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float* 18276 // CHECK: store float [[TMP3]], float* [[TMP4]], align 4 18277 // CHECK: ret void 18278 void test_vst1q_lane_f32(float32_t * a, float32x4_t b) { 18279 vst1q_lane_f32(a, b, 3); 18280 } 18281 18282 // CHECK-LABEL: define void @test_vst1q_lane_p8(i8* %a, <16 x i8> %b) #0 { 18283 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 18284 // CHECK: store i8 [[TMP0]], i8* %a, align 1 18285 // CHECK: ret void 18286 void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) { 18287 vst1q_lane_p8(a, b, 15); 18288 } 18289 18290 // CHECK-LABEL: define void @test_vst1q_lane_p16(i16* %a, <8 x i16> %b) #0 { 18291 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18292 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 18293 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 18294 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 18295 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18296 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18297 // CHECK: ret void 18298 void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) { 18299 vst1q_lane_p16(a, b, 7); 18300 } 18301 18302 // CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #0 { 18303 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 18304 // CHECK: store i8 [[TMP0]], i8* %a, align 1 18305 // CHECK: ret void 18306 void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) { 18307 vst1_lane_u8(a, b, 7); 18308 } 18309 18310 // CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #0 { 18311 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18312 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18313 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18314 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 18315 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18316 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18317 // CHECK: ret void 18318 void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) { 18319 vst1_lane_u16(a, b, 3); 18320 } 18321 18322 // CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #0 { 18323 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18324 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 18325 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 18326 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 18327 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 18328 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4 18329 // CHECK: ret void 18330 void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) { 18331 vst1_lane_u32(a, b, 1); 18332 } 18333 18334 // CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #0 { 18335 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18336 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18337 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18338 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 18339 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 18340 // CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4 18341 // CHECK: ret void 18342 void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) { 18343 vst1_lane_u64(a, b, 0); 18344 } 18345 18346 // CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #0 { 18347 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 18348 // CHECK: store i8 [[TMP0]], i8* %a, align 1 18349 // CHECK: ret void 18350 void test_vst1_lane_s8(int8_t * a, int8x8_t b) { 18351 vst1_lane_s8(a, b, 7); 18352 } 18353 18354 // CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #0 { 18355 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18356 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18357 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18358 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 18359 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18360 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18361 // CHECK: ret void 18362 void test_vst1_lane_s16(int16_t * a, int16x4_t b) { 18363 vst1_lane_s16(a, b, 3); 18364 } 18365 18366 // CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #0 { 18367 // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* 18368 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 18369 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 18370 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 18371 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32* 18372 // CHECK: store i32 [[TMP3]], i32* [[TMP4]], align 4 18373 // CHECK: ret void 18374 void test_vst1_lane_s32(int32_t * a, int32x2_t b) { 18375 vst1_lane_s32(a, b, 1); 18376 } 18377 18378 // CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #0 { 18379 // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* 18380 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 18381 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 18382 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 18383 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64* 18384 // CHECK: store i64 [[TMP3]], i64* [[TMP4]], align 4 18385 // CHECK: ret void 18386 void test_vst1_lane_s64(int64_t * a, int64x1_t b) { 18387 vst1_lane_s64(a, b, 0); 18388 } 18389 18390 // CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 { 18391 // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* 18392 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 18393 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18394 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 18395 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18396 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18397 // CHECK: ret void 18398 void test_vst1_lane_f16(float16_t * a, float16x4_t b) { 18399 vst1_lane_f16(a, b, 3); 18400 } 18401 18402 // CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #0 { 18403 // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* 18404 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 18405 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 18406 // CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 18407 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float* 18408 // CHECK: store float [[TMP3]], float* [[TMP4]], align 4 18409 // CHECK: ret void 18410 void test_vst1_lane_f32(float32_t * a, float32x2_t b) { 18411 vst1_lane_f32(a, b, 1); 18412 } 18413 18414 // CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #0 { 18415 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 18416 // CHECK: store i8 [[TMP0]], i8* %a, align 1 18417 // CHECK: ret void 18418 void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) { 18419 vst1_lane_p8(a, b, 7); 18420 } 18421 18422 // CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #0 { 18423 // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* 18424 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 18425 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 18426 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 18427 // CHECK: [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16* 18428 // CHECK: store i16 [[TMP3]], i16* [[TMP4]], align 2 18429 // CHECK: ret void 18430 void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) { 18431 vst1_lane_p16(a, b, 3); 18432 } 18433 18434 18435 // CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [4 x i64] %b.coerce) #0 { 18436 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 18437 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 18438 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 18439 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 18440 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18441 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* 18442 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* 18443 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18444 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 18445 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 18446 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 18447 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 18448 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 18449 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 18450 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) 18451 // CHECK: ret void 18452 void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) { 18453 vst2q_u8(a, b); 18454 } 18455 18456 // CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [4 x i64] %b.coerce) #0 { 18457 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 18458 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 18459 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 18460 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 18461 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18462 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 18463 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 18464 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18465 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18466 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 18467 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 18468 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 18469 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 18470 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 18471 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 18472 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 18473 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 18474 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18475 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18476 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2) 18477 // CHECK: ret void 18478 void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) { 18479 vst2q_u16(a, b); 18480 } 18481 18482 // CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [4 x i64] %b.coerce) #0 { 18483 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 18484 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 18485 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 18486 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 18487 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18488 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 18489 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 18490 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18491 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 18492 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 18493 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 18494 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 18495 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 18496 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 18497 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 18498 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 18499 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 18500 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 18501 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 18502 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4) 18503 // CHECK: ret void 18504 void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) { 18505 vst2q_u32(a, b); 18506 } 18507 18508 // CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [4 x i64] %b.coerce) #0 { 18509 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 18510 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 18511 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 18512 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 18513 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18514 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* 18515 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* 18516 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18517 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 18518 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 18519 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 18520 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 18521 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 18522 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 18523 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) 18524 // CHECK: ret void 18525 void test_vst2q_s8(int8_t * a, int8x16x2_t b) { 18526 vst2q_s8(a, b); 18527 } 18528 18529 // CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [4 x i64] %b.coerce) #0 { 18530 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 18531 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 18532 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 18533 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 18534 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18535 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 18536 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 18537 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18538 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18539 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 18540 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 18541 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 18542 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 18543 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 18544 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 18545 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 18546 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 18547 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18548 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18549 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2) 18550 // CHECK: ret void 18551 void test_vst2q_s16(int16_t * a, int16x8x2_t b) { 18552 vst2q_s16(a, b); 18553 } 18554 18555 // CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [4 x i64] %b.coerce) #0 { 18556 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 18557 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 18558 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 18559 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 18560 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18561 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 18562 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 18563 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18564 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 18565 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 18566 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 18567 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 18568 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 18569 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 18570 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 18571 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 18572 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 18573 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 18574 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 18575 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4) 18576 // CHECK: ret void 18577 void test_vst2q_s32(int32_t * a, int32x4x2_t b) { 18578 vst2q_s32(a, b); 18579 } 18580 18581 // CHECK-LABEL: define void @test_vst2q_f16(half* %a, [4 x i64] %b.coerce) #0 { 18582 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 18583 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 18584 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 18585 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* 18586 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18587 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 18588 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 18589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18590 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 18591 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 18592 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 18593 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 18594 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 18595 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 18596 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 18597 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 18598 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 18599 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18600 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18601 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2) 18602 // CHECK: ret void 18603 void test_vst2q_f16(float16_t * a, float16x8x2_t b) { 18604 vst2q_f16(a, b); 18605 } 18606 18607 // CHECK-LABEL: define void @test_vst2q_f32(float* %a, [4 x i64] %b.coerce) #0 { 18608 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 18609 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 18610 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 18611 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* 18612 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18613 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 18614 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 18615 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18616 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 18617 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 18618 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 18619 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 18620 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 18621 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 18622 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 18623 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 18624 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 18625 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 18626 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 18627 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4) 18628 // CHECK: ret void 18629 void test_vst2q_f32(float32_t * a, float32x4x2_t b) { 18630 vst2q_f32(a, b); 18631 } 18632 18633 // CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [4 x i64] %b.coerce) #0 { 18634 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 18635 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 18636 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 18637 // CHECK: [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 18638 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18639 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* 18640 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* 18641 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18642 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 18643 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0 18644 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 18645 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 18646 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1 18647 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 18648 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1) 18649 // CHECK: ret void 18650 void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) { 18651 vst2q_p8(a, b); 18652 } 18653 18654 // CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [4 x i64] %b.coerce) #0 { 18655 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 18656 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 18657 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 18658 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 18659 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18660 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 18661 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 18662 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18663 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18664 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 18665 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 18666 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 18667 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 18668 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 18669 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 18670 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 18671 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 18672 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18673 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18674 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2) 18675 // CHECK: ret void 18676 void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) { 18677 vst2q_p16(a, b); 18678 } 18679 18680 // CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x i64] %b.coerce) #0 { 18681 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 18682 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 18683 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 18684 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 18685 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18686 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 18687 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 18688 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18689 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 18690 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 18691 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 18692 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 18693 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 18694 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 18695 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1) 18696 // CHECK: ret void 18697 void test_vst2_u8(uint8_t * a, uint8x8x2_t b) { 18698 vst2_u8(a, b); 18699 } 18700 18701 // CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x i64] %b.coerce) #0 { 18702 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 18703 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 18704 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 18705 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 18706 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18707 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 18708 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 18709 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18710 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18711 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 18712 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 18713 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 18714 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 18715 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 18716 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 18717 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 18718 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 18719 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 18720 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 18721 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2) 18722 // CHECK: ret void 18723 void test_vst2_u16(uint16_t * a, uint16x4x2_t b) { 18724 vst2_u16(a, b); 18725 } 18726 18727 // CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x i64] %b.coerce) #0 { 18728 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 18729 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 18730 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 18731 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 18732 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18733 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 18734 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 18735 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18736 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 18737 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 18738 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 18739 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 18740 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 18741 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 18742 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 18743 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 18744 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 18745 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 18746 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 18747 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4) 18748 // CHECK: ret void 18749 void test_vst2_u32(uint32_t * a, uint32x2x2_t b) { 18750 vst2_u32(a, b); 18751 } 18752 18753 // CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x i64] %b.coerce) #0 { 18754 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 18755 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 18756 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 18757 // CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]* 18758 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18759 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* 18760 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* 18761 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18762 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 18763 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 18764 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0 18765 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 18766 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 18767 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 18768 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1 18769 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 18770 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 18771 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 18772 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 18773 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4) 18774 // CHECK: ret void 18775 void test_vst2_u64(uint64_t * a, uint64x1x2_t b) { 18776 vst2_u64(a, b); 18777 } 18778 18779 // CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x i64] %b.coerce) #0 { 18780 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 18781 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 18782 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 18783 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 18784 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18785 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 18786 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 18787 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18788 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 18789 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 18790 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 18791 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 18792 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 18793 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 18794 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1) 18795 // CHECK: ret void 18796 void test_vst2_s8(int8_t * a, int8x8x2_t b) { 18797 vst2_s8(a, b); 18798 } 18799 18800 // CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x i64] %b.coerce) #0 { 18801 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 18802 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 18803 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 18804 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 18805 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18806 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 18807 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 18808 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18809 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18810 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 18811 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 18812 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 18813 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 18814 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 18815 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 18816 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 18817 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 18818 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 18819 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 18820 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2) 18821 // CHECK: ret void 18822 void test_vst2_s16(int16_t * a, int16x4x2_t b) { 18823 vst2_s16(a, b); 18824 } 18825 18826 // CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x i64] %b.coerce) #0 { 18827 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 18828 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 18829 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 18830 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 18831 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18832 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 18833 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 18834 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18835 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 18836 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 18837 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 18838 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 18839 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 18840 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 18841 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 18842 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 18843 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 18844 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 18845 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 18846 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4) 18847 // CHECK: ret void 18848 void test_vst2_s32(int32_t * a, int32x2x2_t b) { 18849 vst2_s32(a, b); 18850 } 18851 18852 // CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x i64] %b.coerce) #0 { 18853 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 18854 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 18855 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 18856 // CHECK: [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]* 18857 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18858 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* 18859 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* 18860 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18861 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 18862 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 18863 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0 18864 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 18865 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 18866 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 18867 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1 18868 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 18869 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 18870 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 18871 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 18872 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4) 18873 // CHECK: ret void 18874 void test_vst2_s64(int64_t * a, int64x1x2_t b) { 18875 vst2_s64(a, b); 18876 } 18877 18878 // CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x i64] %b.coerce) #0 { 18879 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 18880 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 18881 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 18882 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]* 18883 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18884 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 18885 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 18886 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18887 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 18888 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 18889 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0 18890 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 18891 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 18892 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 18893 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1 18894 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 18895 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 18896 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 18897 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 18898 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2) 18899 // CHECK: ret void 18900 void test_vst2_f16(float16_t * a, float16x4x2_t b) { 18901 vst2_f16(a, b); 18902 } 18903 18904 // CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x i64] %b.coerce) #0 { 18905 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 18906 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 18907 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 18908 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]* 18909 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18910 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 18911 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 18912 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18913 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 18914 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 18915 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0 18916 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 18917 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 18918 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 18919 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1 18920 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 18921 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 18922 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 18923 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 18924 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4) 18925 // CHECK: ret void 18926 void test_vst2_f32(float32_t * a, float32x2x2_t b) { 18927 vst2_f32(a, b); 18928 } 18929 18930 // CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x i64] %b.coerce) #0 { 18931 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 18932 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 18933 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 18934 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 18935 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18936 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 18937 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 18938 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18939 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 18940 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 18941 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 18942 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 18943 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 18944 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 18945 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1) 18946 // CHECK: ret void 18947 void test_vst2_p8(poly8_t * a, poly8x8x2_t b) { 18948 vst2_p8(a, b); 18949 } 18950 18951 // CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x i64] %b.coerce) #0 { 18952 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 18953 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 18954 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 18955 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 18956 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 18957 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 18958 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 18959 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 18960 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18961 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 18962 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 18963 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 18964 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 18965 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 18966 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 18967 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 18968 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 18969 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 18970 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 18971 // CHECK: call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2) 18972 // CHECK: ret void 18973 void test_vst2_p16(poly16_t * a, poly16x4x2_t b) { 18974 vst2_p16(a, b); 18975 } 18976 18977 18978 // CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [4 x i64] %b.coerce) #0 { 18979 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 18980 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 18981 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 18982 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 18983 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 18984 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* 18985 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* 18986 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 18987 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 18988 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 18989 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 18990 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 18991 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 18992 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 18993 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 18994 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 18995 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 18996 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 18997 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 18998 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2) 18999 // CHECK: ret void 19000 void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) { 19001 vst2q_lane_u16(a, b, 7); 19002 } 19003 19004 // CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [4 x i64] %b.coerce) #0 { 19005 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 19006 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 19007 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 19008 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 19009 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19010 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* 19011 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* 19012 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19013 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19014 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 19015 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 19016 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 19017 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 19018 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 19019 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 19020 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 19021 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 19022 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 19023 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 19024 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4) 19025 // CHECK: ret void 19026 void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) { 19027 vst2q_lane_u32(a, b, 3); 19028 } 19029 19030 // CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [4 x i64] %b.coerce) #0 { 19031 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 19032 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 19033 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 19034 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 19035 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19036 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* 19037 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* 19038 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19039 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19040 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 19041 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 19042 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19043 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19044 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 19045 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19046 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19047 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19048 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19049 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19050 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2) 19051 // CHECK: ret void 19052 void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) { 19053 vst2q_lane_s16(a, b, 7); 19054 } 19055 19056 // CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [4 x i64] %b.coerce) #0 { 19057 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 19058 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 19059 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 19060 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 19061 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19062 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* 19063 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* 19064 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19065 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19066 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 19067 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0 19068 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 19069 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 19070 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 19071 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1 19072 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 19073 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 19074 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 19075 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 19076 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4) 19077 // CHECK: ret void 19078 void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) { 19079 vst2q_lane_s32(a, b, 3); 19080 } 19081 19082 // CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [4 x i64] %b.coerce) #0 { 19083 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 19084 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 19085 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 19086 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]* 19087 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19088 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* 19089 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* 19090 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19091 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 19092 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 19093 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0 19094 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 19095 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 19096 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 19097 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1 19098 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 19099 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 19100 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19101 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19102 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2) 19103 // CHECK: ret void 19104 void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) { 19105 vst2q_lane_f16(a, b, 7); 19106 } 19107 19108 // CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [4 x i64] %b.coerce) #0 { 19109 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 19110 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 19111 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 19112 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]* 19113 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19114 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* 19115 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* 19116 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19117 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 19118 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 19119 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0 19120 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 19121 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 19122 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 19123 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1 19124 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 19125 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 19126 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 19127 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 19128 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4) 19129 // CHECK: ret void 19130 void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) { 19131 vst2q_lane_f32(a, b, 3); 19132 } 19133 19134 // CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [4 x i64] %b.coerce) #0 { 19135 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 19136 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 19137 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 19138 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 19139 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16 19140 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* 19141 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* 19142 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 16, i1 false) 19143 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19144 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 19145 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0 19146 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19147 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19148 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 19149 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19150 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19151 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19152 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19153 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19154 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2) 19155 // CHECK: ret void 19156 void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) { 19157 vst2q_lane_p16(a, b, 7); 19158 } 19159 19160 // CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x i64] %b.coerce) #0 { 19161 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 19162 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 19163 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 19164 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 19165 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19166 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* 19167 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* 19168 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19169 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 19170 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 19171 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19172 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 19173 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19174 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19175 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1) 19176 // CHECK: ret void 19177 void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) { 19178 vst2_lane_u8(a, b, 7); 19179 } 19180 19181 // CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x i64] %b.coerce) #0 { 19182 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 19183 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 19184 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 19185 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 19186 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19187 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* 19188 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* 19189 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19190 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19191 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 19192 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 19193 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19194 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19195 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 19196 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19197 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19198 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19199 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19200 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19201 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2) 19202 // CHECK: ret void 19203 void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) { 19204 vst2_lane_u16(a, b, 3); 19205 } 19206 19207 // CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x i64] %b.coerce) #0 { 19208 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 19209 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 19210 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 19211 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 19212 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19213 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* 19214 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* 19215 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19216 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19217 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 19218 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 19219 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 19220 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 19221 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 19222 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 19223 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 19224 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 19225 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 19226 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 19227 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4) 19228 // CHECK: ret void 19229 void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) { 19230 vst2_lane_u32(a, b, 1); 19231 } 19232 19233 // CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x i64] %b.coerce) #0 { 19234 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 19235 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 19236 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 19237 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 19238 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19239 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* 19240 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* 19241 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19242 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 19243 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 19244 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19245 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 19246 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19247 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19248 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1) 19249 // CHECK: ret void 19250 void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) { 19251 vst2_lane_s8(a, b, 7); 19252 } 19253 19254 // CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x i64] %b.coerce) #0 { 19255 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 19256 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 19257 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 19258 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 19259 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19260 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* 19261 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* 19262 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19263 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19264 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 19265 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 19266 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19267 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19268 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 19269 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19270 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19271 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19272 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19273 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19274 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2) 19275 // CHECK: ret void 19276 void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) { 19277 vst2_lane_s16(a, b, 3); 19278 } 19279 19280 // CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x i64] %b.coerce) #0 { 19281 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 19282 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 19283 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 19284 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]* 19285 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19286 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* 19287 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* 19288 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19289 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19290 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 19291 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0 19292 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 19293 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 19294 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 19295 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1 19296 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 19297 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 19298 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 19299 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 19300 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4) 19301 // CHECK: ret void 19302 void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) { 19303 vst2_lane_s32(a, b, 1); 19304 } 19305 19306 // CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x i64] %b.coerce) #0 { 19307 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 19308 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 19309 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 19310 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]* 19311 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19312 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* 19313 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* 19314 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19315 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 19316 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 19317 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0 19318 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 19319 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 19320 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 19321 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1 19322 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 19323 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 19324 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19325 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19326 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2) 19327 // CHECK: ret void 19328 void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) { 19329 vst2_lane_f16(a, b, 3); 19330 } 19331 19332 // CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x i64] %b.coerce) #0 { 19333 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 19334 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 19335 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 19336 // CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]* 19337 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19338 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* 19339 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* 19340 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19341 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 19342 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 19343 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0 19344 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 19345 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 19346 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 19347 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1 19348 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 19349 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 19350 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 19351 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 19352 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4) 19353 // CHECK: ret void 19354 void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) { 19355 vst2_lane_f32(a, b, 1); 19356 } 19357 19358 // CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x i64] %b.coerce) #0 { 19359 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 19360 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 19361 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 19362 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 19363 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19364 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* 19365 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* 19366 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19367 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 19368 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0 19369 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19370 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 19371 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19372 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19373 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1) 19374 // CHECK: ret void 19375 void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) { 19376 vst2_lane_p8(a, b, 7); 19377 } 19378 19379 // CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x i64] %b.coerce) #0 { 19380 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 19381 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 19382 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 19383 // CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]* 19384 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 19385 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* 19386 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* 19387 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 16, i32 8, i1 false) 19388 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19389 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 19390 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0 19391 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19392 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19393 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 19394 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19395 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19396 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19397 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19398 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19399 // CHECK: call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2) 19400 // CHECK: ret void 19401 void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) { 19402 vst2_lane_p16(a, b, 3); 19403 } 19404 19405 19406 // CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [6 x i64] %b.coerce) #0 { 19407 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 19408 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 19409 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 19410 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* 19411 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19412 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* 19413 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* 19414 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19415 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 19416 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 19417 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 19418 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 19419 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 19420 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 19421 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 19422 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 19423 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 19424 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) 19425 // CHECK: ret void 19426 void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) { 19427 vst3q_u8(a, b); 19428 } 19429 19430 // CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [6 x i64] %b.coerce) #0 { 19431 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 19432 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 19433 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 19434 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 19435 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19436 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 19437 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 19438 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19439 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19440 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 19441 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 19442 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19443 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19444 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 19445 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19446 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19447 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19448 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 19449 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 19450 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 19451 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 19452 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19453 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19454 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 19455 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2) 19456 // CHECK: ret void 19457 void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) { 19458 vst3q_u16(a, b); 19459 } 19460 19461 // CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [6 x i64] %b.coerce) #0 { 19462 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 19463 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 19464 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 19465 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 19466 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19467 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 19468 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 19469 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19470 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19471 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 19472 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 19473 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 19474 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 19475 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 19476 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 19477 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 19478 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 19479 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 19480 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 19481 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 19482 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 19483 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 19484 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 19485 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 19486 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4) 19487 // CHECK: ret void 19488 void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) { 19489 vst3q_u32(a, b); 19490 } 19491 19492 // CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [6 x i64] %b.coerce) #0 { 19493 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 19494 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 19495 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 19496 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* 19497 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19498 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* 19499 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* 19500 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19501 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 19502 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 19503 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 19504 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 19505 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 19506 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 19507 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 19508 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 19509 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 19510 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) 19511 // CHECK: ret void 19512 void test_vst3q_s8(int8_t * a, int8x16x3_t b) { 19513 vst3q_s8(a, b); 19514 } 19515 19516 // CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [6 x i64] %b.coerce) #0 { 19517 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 19518 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 19519 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 19520 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 19521 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19522 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 19523 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 19524 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19525 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19526 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 19527 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 19528 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19529 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19530 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 19531 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19532 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19533 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19534 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 19535 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 19536 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 19537 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 19538 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19539 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19540 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 19541 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2) 19542 // CHECK: ret void 19543 void test_vst3q_s16(int16_t * a, int16x8x3_t b) { 19544 vst3q_s16(a, b); 19545 } 19546 19547 // CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [6 x i64] %b.coerce) #0 { 19548 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 19549 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 19550 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 19551 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 19552 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19553 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 19554 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 19555 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19556 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19557 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 19558 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 19559 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 19560 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 19561 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 19562 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 19563 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 19564 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 19565 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 19566 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 19567 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 19568 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 19569 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 19570 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 19571 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 19572 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4) 19573 // CHECK: ret void 19574 void test_vst3q_s32(int32_t * a, int32x4x3_t b) { 19575 vst3q_s32(a, b); 19576 } 19577 19578 // CHECK-LABEL: define void @test_vst3q_f16(half* %a, [6 x i64] %b.coerce) #0 { 19579 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 19580 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 19581 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 19582 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* 19583 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19584 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 19585 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 19586 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19587 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 19588 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 19589 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 19590 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 19591 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 19592 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 19593 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 19594 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 19595 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 19596 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 19597 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 19598 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 19599 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 19600 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19601 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19602 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 19603 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2) 19604 // CHECK: ret void 19605 void test_vst3q_f16(float16_t * a, float16x8x3_t b) { 19606 vst3q_f16(a, b); 19607 } 19608 19609 // CHECK-LABEL: define void @test_vst3q_f32(float* %a, [6 x i64] %b.coerce) #0 { 19610 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 19611 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 19612 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 19613 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* 19614 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19615 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 19616 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 19617 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19618 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 19619 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 19620 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 19621 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 19622 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 19623 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 19624 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 19625 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 19626 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 19627 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 19628 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 19629 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 19630 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 19631 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 19632 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 19633 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 19634 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4) 19635 // CHECK: ret void 19636 void test_vst3q_f32(float32_t * a, float32x4x3_t b) { 19637 vst3q_f32(a, b); 19638 } 19639 19640 // CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [6 x i64] %b.coerce) #0 { 19641 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 19642 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 19643 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 19644 // CHECK: [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]* 19645 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19646 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* 19647 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* 19648 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19649 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 19650 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0 19651 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 19652 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 19653 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1 19654 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 19655 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 19656 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2 19657 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 19658 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1) 19659 // CHECK: ret void 19660 void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) { 19661 vst3q_p8(a, b); 19662 } 19663 19664 // CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [6 x i64] %b.coerce) #0 { 19665 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 19666 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 19667 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 19668 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 19669 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 19670 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 19671 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 19672 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 19673 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19674 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 19675 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 19676 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 19677 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 19678 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 19679 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 19680 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 19681 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 19682 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 19683 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 19684 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 19685 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 19686 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 19687 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 19688 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 19689 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2) 19690 // CHECK: ret void 19691 void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) { 19692 vst3q_p16(a, b); 19693 } 19694 19695 // CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x i64] %b.coerce) #0 { 19696 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 19697 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 19698 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 19699 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 19700 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19701 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 19702 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 19703 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19704 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 19705 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 19706 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19707 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 19708 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19709 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19710 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 19711 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 19712 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 19713 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1) 19714 // CHECK: ret void 19715 void test_vst3_u8(uint8_t * a, uint8x8x3_t b) { 19716 vst3_u8(a, b); 19717 } 19718 19719 // CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x i64] %b.coerce) #0 { 19720 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 19721 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 19722 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 19723 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 19724 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19725 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 19726 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 19727 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19728 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19729 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 19730 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 19731 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19732 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19733 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 19734 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19735 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19736 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19737 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 19738 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 19739 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 19740 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 19741 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19742 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19743 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 19744 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2) 19745 // CHECK: ret void 19746 void test_vst3_u16(uint16_t * a, uint16x4x3_t b) { 19747 vst3_u16(a, b); 19748 } 19749 19750 // CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x i64] %b.coerce) #0 { 19751 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 19752 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 19753 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 19754 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 19755 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19756 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 19757 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 19758 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19759 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19760 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 19761 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 19762 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 19763 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 19764 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 19765 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 19766 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 19767 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 19768 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 19769 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 19770 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 19771 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 19772 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 19773 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 19774 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 19775 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4) 19776 // CHECK: ret void 19777 void test_vst3_u32(uint32_t * a, uint32x2x3_t b) { 19778 vst3_u32(a, b); 19779 } 19780 19781 // CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x i64] %b.coerce) #0 { 19782 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 19783 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 19784 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 19785 // CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]* 19786 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19787 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* 19788 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* 19789 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19790 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 19791 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 19792 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0 19793 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 19794 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 19795 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 19796 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1 19797 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 19798 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 19799 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 19800 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2 19801 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 19802 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 19803 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 19804 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 19805 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 19806 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4) 19807 // CHECK: ret void 19808 void test_vst3_u64(uint64_t * a, uint64x1x3_t b) { 19809 vst3_u64(a, b); 19810 } 19811 19812 // CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x i64] %b.coerce) #0 { 19813 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 19814 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 19815 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 19816 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 19817 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19818 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 19819 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 19820 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19821 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 19822 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 19823 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 19824 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 19825 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 19826 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 19827 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 19828 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 19829 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 19830 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1) 19831 // CHECK: ret void 19832 void test_vst3_s8(int8_t * a, int8x8x3_t b) { 19833 vst3_s8(a, b); 19834 } 19835 19836 // CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x i64] %b.coerce) #0 { 19837 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 19838 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 19839 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 19840 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 19841 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19842 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 19843 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 19844 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19845 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 19846 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 19847 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 19848 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 19849 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 19850 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 19851 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 19852 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 19853 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 19854 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 19855 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 19856 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 19857 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 19858 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19859 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19860 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 19861 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2) 19862 // CHECK: ret void 19863 void test_vst3_s16(int16_t * a, int16x4x3_t b) { 19864 vst3_s16(a, b); 19865 } 19866 19867 // CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x i64] %b.coerce) #0 { 19868 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 19869 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 19870 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 19871 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 19872 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19873 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 19874 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 19875 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19876 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 19877 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 19878 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 19879 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 19880 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 19881 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 19882 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 19883 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 19884 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 19885 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 19886 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 19887 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 19888 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 19889 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 19890 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 19891 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 19892 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4) 19893 // CHECK: ret void 19894 void test_vst3_s32(int32_t * a, int32x2x3_t b) { 19895 vst3_s32(a, b); 19896 } 19897 19898 // CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x i64] %b.coerce) #0 { 19899 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 19900 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 19901 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 19902 // CHECK: [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]* 19903 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19904 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* 19905 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* 19906 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19907 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 19908 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 19909 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0 19910 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 19911 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 19912 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 19913 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1 19914 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 19915 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 19916 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 19917 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2 19918 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 19919 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 19920 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 19921 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 19922 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 19923 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4) 19924 // CHECK: ret void 19925 void test_vst3_s64(int64_t * a, int64x1x3_t b) { 19926 vst3_s64(a, b); 19927 } 19928 19929 // CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x i64] %b.coerce) #0 { 19930 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 19931 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 19932 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 19933 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]* 19934 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19935 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 19936 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 19937 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19938 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 19939 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 19940 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0 19941 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 19942 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 19943 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 19944 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1 19945 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 19946 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 19947 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 19948 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2 19949 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 19950 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 19951 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 19952 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 19953 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 19954 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2) 19955 // CHECK: ret void 19956 void test_vst3_f16(float16_t * a, float16x4x3_t b) { 19957 vst3_f16(a, b); 19958 } 19959 19960 // CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x i64] %b.coerce) #0 { 19961 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 19962 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 19963 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 19964 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]* 19965 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19966 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 19967 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 19968 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 19969 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 19970 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 19971 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0 19972 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 19973 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 19974 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 19975 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1 19976 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 19977 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 19978 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 19979 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2 19980 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 19981 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 19982 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 19983 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 19984 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 19985 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4) 19986 // CHECK: ret void 19987 void test_vst3_f32(float32_t * a, float32x2x3_t b) { 19988 vst3_f32(a, b); 19989 } 19990 19991 // CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x i64] %b.coerce) #0 { 19992 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 19993 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 19994 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 19995 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 19996 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 19997 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 19998 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 19999 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20000 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20001 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 20002 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20003 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20004 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20005 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20006 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20007 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20008 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20009 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1) 20010 // CHECK: ret void 20011 void test_vst3_p8(poly8_t * a, poly8x8x3_t b) { 20012 vst3_p8(a, b); 20013 } 20014 20015 // CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x i64] %b.coerce) #0 { 20016 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 20017 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 20018 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 20019 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 20020 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20021 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 20022 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 20023 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20024 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20025 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20026 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 20027 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20028 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20029 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20030 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20031 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20032 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20033 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20034 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20035 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20036 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20037 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20038 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20039 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20040 // CHECK: call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2) 20041 // CHECK: ret void 20042 void test_vst3_p16(poly16_t * a, poly16x4x3_t b) { 20043 vst3_p16(a, b); 20044 } 20045 20046 20047 // CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [6 x i64] %b.coerce) #0 { 20048 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 20049 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 20050 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 20051 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 20052 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20053 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* 20054 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* 20055 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20056 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20057 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 20058 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 20059 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20060 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20061 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 20062 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20063 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20064 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20065 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 20066 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20067 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20068 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20069 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20070 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20071 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20072 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2) 20073 // CHECK: ret void 20074 void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) { 20075 vst3q_lane_u16(a, b, 7); 20076 } 20077 20078 // CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [6 x i64] %b.coerce) #0 { 20079 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 20080 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 20081 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 20082 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 20083 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20084 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* 20085 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* 20086 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20087 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20088 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 20089 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 20090 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 20091 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 20092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 20093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 20094 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 20095 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 20096 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 20097 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 20098 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 20099 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 20100 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 20101 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 20102 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 20103 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4) 20104 // CHECK: ret void 20105 void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) { 20106 vst3q_lane_u32(a, b, 3); 20107 } 20108 20109 // CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [6 x i64] %b.coerce) #0 { 20110 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 20111 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 20112 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 20113 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 20114 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20115 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* 20116 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* 20117 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20118 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20119 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 20120 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 20121 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20122 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20123 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 20124 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20125 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20126 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20127 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 20128 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20129 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20130 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20131 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20132 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20133 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20134 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2) 20135 // CHECK: ret void 20136 void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) { 20137 vst3q_lane_s16(a, b, 7); 20138 } 20139 20140 // CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [6 x i64] %b.coerce) #0 { 20141 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 20142 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 20143 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 20144 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]* 20145 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20146 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* 20147 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* 20148 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20149 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20150 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 20151 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0 20152 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 20153 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 20154 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 20155 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1 20156 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 20157 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 20158 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 20159 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2 20160 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 20161 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 20162 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 20163 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 20164 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 20165 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4) 20166 // CHECK: ret void 20167 void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) { 20168 vst3q_lane_s32(a, b, 3); 20169 } 20170 20171 // CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [6 x i64] %b.coerce) #0 { 20172 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 20173 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 20174 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 20175 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]* 20176 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20177 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* 20178 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* 20179 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20180 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 20181 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 20182 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0 20183 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 20184 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 20185 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 20186 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1 20187 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 20188 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 20189 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 20190 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2 20191 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 20192 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 20193 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20194 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20195 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20196 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2) 20197 // CHECK: ret void 20198 void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) { 20199 vst3q_lane_f16(a, b, 7); 20200 } 20201 20202 // CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [6 x i64] %b.coerce) #0 { 20203 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 20204 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 20205 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 20206 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]* 20207 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20208 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* 20209 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* 20210 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20211 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 20212 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 20213 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0 20214 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 20215 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 20216 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 20217 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1 20218 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 20219 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 20220 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 20221 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2 20222 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 20223 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 20224 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 20225 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 20226 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 20227 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4) 20228 // CHECK: ret void 20229 void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) { 20230 vst3q_lane_f32(a, b, 3); 20231 } 20232 20233 // CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [6 x i64] %b.coerce) #0 { 20234 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 20235 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 20236 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 20237 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]* 20238 // CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16 20239 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* 20240 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* 20241 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 48, i32 16, i1 false) 20242 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20243 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 20244 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0 20245 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20246 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20247 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 20248 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20249 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20250 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20251 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 20252 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20253 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20254 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20255 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20256 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20257 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20258 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2) 20259 // CHECK: ret void 20260 void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) { 20261 vst3q_lane_p16(a, b, 7); 20262 } 20263 20264 // CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x i64] %b.coerce) #0 { 20265 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 20266 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 20267 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 20268 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 20269 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20270 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* 20271 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* 20272 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20273 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 20274 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 20275 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20276 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 20277 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20278 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20279 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 20280 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20281 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20282 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 20283 // CHECK: ret void 20284 void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) { 20285 vst3_lane_u8(a, b, 7); 20286 } 20287 20288 // CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x i64] %b.coerce) #0 { 20289 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 20290 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 20291 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 20292 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 20293 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20294 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* 20295 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* 20296 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20297 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20298 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 20299 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 20300 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20301 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20302 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 20303 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20304 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20305 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20306 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 20307 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20308 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20309 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20310 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20311 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20312 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20313 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2) 20314 // CHECK: ret void 20315 void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) { 20316 vst3_lane_u16(a, b, 3); 20317 } 20318 20319 // CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x i64] %b.coerce) #0 { 20320 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 20321 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 20322 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 20323 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 20324 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20325 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* 20326 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* 20327 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20328 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20329 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 20330 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 20331 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 20332 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 20333 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 20334 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 20335 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 20336 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 20337 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 20338 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 20339 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 20340 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 20341 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 20342 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 20343 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 20344 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4) 20345 // CHECK: ret void 20346 void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) { 20347 vst3_lane_u32(a, b, 1); 20348 } 20349 20350 // CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x i64] %b.coerce) #0 { 20351 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 20352 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 20353 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 20354 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 20355 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20356 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* 20357 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* 20358 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20359 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 20360 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 20361 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20362 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 20363 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20364 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20365 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 20366 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20367 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20368 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 20369 // CHECK: ret void 20370 void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) { 20371 vst3_lane_s8(a, b, 7); 20372 } 20373 20374 // CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x i64] %b.coerce) #0 { 20375 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 20376 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 20377 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 20378 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 20379 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20380 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* 20381 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* 20382 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20383 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20384 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 20385 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 20386 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20387 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20388 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 20389 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20390 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20391 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20392 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 20393 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20394 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20395 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20396 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20397 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20398 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20399 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2) 20400 // CHECK: ret void 20401 void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) { 20402 vst3_lane_s16(a, b, 3); 20403 } 20404 20405 // CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x i64] %b.coerce) #0 { 20406 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 20407 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 20408 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 20409 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]* 20410 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20411 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* 20412 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* 20413 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20414 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20415 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 20416 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0 20417 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 20418 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 20419 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 20420 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1 20421 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 20422 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 20423 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 20424 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2 20425 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 20426 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 20427 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 20428 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 20429 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 20430 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4) 20431 // CHECK: ret void 20432 void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) { 20433 vst3_lane_s32(a, b, 1); 20434 } 20435 20436 // CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x i64] %b.coerce) #0 { 20437 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 20438 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 20439 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 20440 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]* 20441 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20442 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* 20443 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* 20444 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20445 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 20446 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 20447 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0 20448 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 20449 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 20450 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 20451 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1 20452 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 20453 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 20454 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 20455 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2 20456 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 20457 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 20458 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20459 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20460 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20461 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2) 20462 // CHECK: ret void 20463 void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) { 20464 vst3_lane_f16(a, b, 3); 20465 } 20466 20467 // CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x i64] %b.coerce) #0 { 20468 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 20469 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 20470 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 20471 // CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]* 20472 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20473 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* 20474 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* 20475 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20476 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 20477 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 20478 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0 20479 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 20480 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 20481 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 20482 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1 20483 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 20484 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 20485 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 20486 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2 20487 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 20488 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 20489 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 20490 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 20491 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 20492 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4) 20493 // CHECK: ret void 20494 void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) { 20495 vst3_lane_f32(a, b, 1); 20496 } 20497 20498 // CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x i64] %b.coerce) #0 { 20499 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 20500 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 20501 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 20502 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 20503 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20504 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* 20505 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* 20506 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20507 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20508 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0 20509 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20510 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20511 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20512 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20513 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 20514 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20515 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20516 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) 20517 // CHECK: ret void 20518 void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) { 20519 vst3_lane_p8(a, b, 7); 20520 } 20521 20522 // CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x i64] %b.coerce) #0 { 20523 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 20524 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 20525 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 20526 // CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]* 20527 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 20528 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* 20529 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* 20530 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 24, i32 8, i1 false) 20531 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20532 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20533 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0 20534 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20535 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20536 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20537 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20538 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20539 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20540 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 20541 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20542 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20543 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20544 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20545 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20546 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20547 // CHECK: call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2) 20548 // CHECK: ret void 20549 void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) { 20550 vst3_lane_p16(a, b, 3); 20551 } 20552 20553 20554 // CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [8 x i64] %b.coerce) #0 { 20555 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 20556 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 20557 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 20558 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* 20559 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20560 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* 20561 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* 20562 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20563 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 20564 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 20565 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 20566 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 20567 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 20568 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 20569 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 20570 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 20571 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 20572 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 20573 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 20574 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 20575 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) 20576 // CHECK: ret void 20577 void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) { 20578 vst4q_u8(a, b); 20579 } 20580 20581 // CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [8 x i64] %b.coerce) #0 { 20582 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 20583 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 20584 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 20585 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 20586 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20587 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 20588 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 20589 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20590 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20591 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 20592 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 20593 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20594 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20595 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 20596 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20597 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20598 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20599 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 20600 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20601 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20602 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20603 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 20604 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 20605 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 20606 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 20607 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20608 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20609 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20610 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 20611 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2) 20612 // CHECK: ret void 20613 void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) { 20614 vst4q_u16(a, b); 20615 } 20616 20617 // CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [8 x i64] %b.coerce) #0 { 20618 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 20619 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 20620 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 20621 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 20622 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20623 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 20624 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 20625 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20626 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20627 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 20628 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 20629 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 20630 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 20631 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 20632 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 20633 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 20634 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 20635 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 20636 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 20637 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 20638 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 20639 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 20640 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 20641 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 20642 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 20643 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 20644 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 20645 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 20646 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 20647 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4) 20648 // CHECK: ret void 20649 void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) { 20650 vst4q_u32(a, b); 20651 } 20652 20653 // CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [8 x i64] %b.coerce) #0 { 20654 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 20655 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 20656 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 20657 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* 20658 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20659 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* 20660 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* 20661 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20662 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 20663 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 20664 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 20665 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 20666 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 20667 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 20668 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 20669 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 20670 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 20671 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 20672 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 20673 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 20674 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) 20675 // CHECK: ret void 20676 void test_vst4q_s8(int8_t * a, int8x16x4_t b) { 20677 vst4q_s8(a, b); 20678 } 20679 20680 // CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [8 x i64] %b.coerce) #0 { 20681 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 20682 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 20683 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 20684 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 20685 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20686 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 20687 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 20688 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20689 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20690 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 20691 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 20692 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20693 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20694 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 20695 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20696 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20697 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20698 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 20699 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20700 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20701 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20702 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 20703 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 20704 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 20705 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 20706 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20707 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20708 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20709 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 20710 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2) 20711 // CHECK: ret void 20712 void test_vst4q_s16(int16_t * a, int16x8x4_t b) { 20713 vst4q_s16(a, b); 20714 } 20715 20716 // CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [8 x i64] %b.coerce) #0 { 20717 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 20718 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 20719 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 20720 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 20721 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20722 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 20723 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 20724 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20725 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20726 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 20727 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 20728 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 20729 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 20730 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 20731 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 20732 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 20733 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 20734 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 20735 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 20736 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 20737 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 20738 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 20739 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 20740 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 20741 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 20742 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 20743 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 20744 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 20745 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 20746 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4) 20747 // CHECK: ret void 20748 void test_vst4q_s32(int32_t * a, int32x4x4_t b) { 20749 vst4q_s32(a, b); 20750 } 20751 20752 // CHECK-LABEL: define void @test_vst4q_f16(half* %a, [8 x i64] %b.coerce) #0 { 20753 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 20754 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 20755 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 20756 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* 20757 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20758 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 20759 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 20760 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20761 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 20762 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 20763 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 20764 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 20765 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 20766 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 20767 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 20768 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 20769 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 20770 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 20771 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 20772 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 20773 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 20774 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 20775 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 20776 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 20777 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> 20778 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20779 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20780 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20781 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 20782 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2) 20783 // CHECK: ret void 20784 void test_vst4q_f16(float16_t * a, float16x8x4_t b) { 20785 vst4q_f16(a, b); 20786 } 20787 20788 // CHECK-LABEL: define void @test_vst4q_f32(float* %a, [8 x i64] %b.coerce) #0 { 20789 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 20790 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 20791 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 20792 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* 20793 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20794 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 20795 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 20796 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20797 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 20798 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 20799 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 20800 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 20801 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 20802 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 20803 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 20804 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 20805 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 20806 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 20807 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 20808 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 20809 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 20810 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 20811 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 20812 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 20813 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> 20814 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 20815 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 20816 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 20817 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float> 20818 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4) 20819 // CHECK: ret void 20820 void test_vst4q_f32(float32_t * a, float32x4x4_t b) { 20821 vst4q_f32(a, b); 20822 } 20823 20824 // CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [8 x i64] %b.coerce) #0 { 20825 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 20826 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 20827 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 20828 // CHECK: [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]* 20829 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20830 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* 20831 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* 20832 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20833 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 20834 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0 20835 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 20836 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 20837 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1 20838 // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 20839 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 20840 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2 20841 // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 20842 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 20843 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3 20844 // CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 20845 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1) 20846 // CHECK: ret void 20847 void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) { 20848 vst4q_p8(a, b); 20849 } 20850 20851 // CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [8 x i64] %b.coerce) #0 { 20852 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 20853 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 20854 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 20855 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 20856 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 20857 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 20858 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 20859 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 20860 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20861 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 20862 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 20863 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 20864 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 20865 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 20866 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 20867 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 20868 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 20869 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 20870 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 20871 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 20872 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 20873 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 20874 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 20875 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 20876 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 20877 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 20878 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 20879 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 20880 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 20881 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2) 20882 // CHECK: ret void 20883 void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) { 20884 vst4q_p16(a, b); 20885 } 20886 20887 // CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x i64] %b.coerce) #0 { 20888 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 20889 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 20890 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 20891 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 20892 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 20893 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 20894 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 20895 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 20896 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 20897 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 20898 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 20899 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 20900 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 20901 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 20902 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 20903 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 20904 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 20905 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 20906 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 20907 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 20908 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1) 20909 // CHECK: ret void 20910 void test_vst4_u8(uint8_t * a, uint8x8x4_t b) { 20911 vst4_u8(a, b); 20912 } 20913 20914 // CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x i64] %b.coerce) #0 { 20915 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 20916 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 20917 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 20918 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 20919 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 20920 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 20921 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 20922 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 20923 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 20924 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 20925 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 20926 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 20927 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 20928 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 20929 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 20930 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 20931 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 20932 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 20933 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 20934 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 20935 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 20936 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 20937 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 20938 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 20939 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 20940 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 20941 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 20942 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 20943 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 20944 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2) 20945 // CHECK: ret void 20946 void test_vst4_u16(uint16_t * a, uint16x4x4_t b) { 20947 vst4_u16(a, b); 20948 } 20949 20950 // CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x i64] %b.coerce) #0 { 20951 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 20952 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 20953 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 20954 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 20955 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 20956 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 20957 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 20958 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 20959 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 20960 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 20961 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 20962 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 20963 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 20964 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 20965 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 20966 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 20967 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 20968 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 20969 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 20970 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 20971 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 20972 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 20973 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 20974 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 20975 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 20976 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 20977 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 20978 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 20979 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 20980 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4) 20981 // CHECK: ret void 20982 void test_vst4_u32(uint32_t * a, uint32x2x4_t b) { 20983 vst4_u32(a, b); 20984 } 20985 20986 // CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x i64] %b.coerce) #0 { 20987 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 20988 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 20989 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 20990 // CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]* 20991 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 20992 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* 20993 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* 20994 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 20995 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 20996 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 20997 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0 20998 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 20999 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 21000 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 21001 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1 21002 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 21003 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 21004 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 21005 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2 21006 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 21007 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 21008 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 21009 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3 21010 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 21011 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 21012 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 21013 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 21014 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 21015 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 21016 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4) 21017 // CHECK: ret void 21018 void test_vst4_u64(uint64_t * a, uint64x1x4_t b) { 21019 vst4_u64(a, b); 21020 } 21021 21022 // CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x i64] %b.coerce) #0 { 21023 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 21024 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 21025 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 21026 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21027 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21028 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 21029 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 21030 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21031 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21032 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21033 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21034 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21035 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21036 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21037 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21038 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21039 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21040 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21041 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21042 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21043 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1) 21044 // CHECK: ret void 21045 void test_vst4_s8(int8_t * a, int8x8x4_t b) { 21046 vst4_s8(a, b); 21047 } 21048 21049 // CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x i64] %b.coerce) #0 { 21050 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 21051 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 21052 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 21053 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21054 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21055 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 21056 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 21057 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21058 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21059 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21060 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21061 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21062 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21063 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21064 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21065 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21066 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21067 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21068 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21069 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21070 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21071 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21072 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21073 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21074 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21075 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21076 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21077 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21078 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21079 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2) 21080 // CHECK: ret void 21081 void test_vst4_s16(int16_t * a, int16x4x4_t b) { 21082 vst4_s16(a, b); 21083 } 21084 21085 // CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x i64] %b.coerce) #0 { 21086 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 21087 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 21088 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 21089 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 21090 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21091 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 21092 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 21093 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21094 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21095 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21096 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 21097 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 21098 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 21099 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21100 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 21101 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 21102 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 21103 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21104 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 21105 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 21106 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 21107 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21108 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 21109 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 21110 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 21111 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 21112 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 21113 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 21114 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 21115 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4) 21116 // CHECK: ret void 21117 void test_vst4_s32(int32_t * a, int32x2x4_t b) { 21118 vst4_s32(a, b); 21119 } 21120 21121 // CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x i64] %b.coerce) #0 { 21122 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 21123 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 21124 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 21125 // CHECK: [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]* 21126 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21127 // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* 21128 // CHECK: [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* 21129 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21130 // CHECK: [[TMP3:%.*]] = bitcast i64* %a to i8* 21131 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 21132 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0 21133 // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 21134 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 21135 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 21136 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1 21137 // CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 21138 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 21139 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 21140 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2 21141 // CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 21142 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 21143 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 21144 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3 21145 // CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 21146 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 21147 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 21148 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 21149 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 21150 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 21151 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4) 21152 // CHECK: ret void 21153 void test_vst4_s64(int64_t * a, int64x1x4_t b) { 21154 vst4_s64(a, b); 21155 } 21156 21157 // CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x i64] %b.coerce) #0 { 21158 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 21159 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 21160 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 21161 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]* 21162 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21163 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 21164 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 21165 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21166 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 21167 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21168 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0 21169 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 21170 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 21171 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21172 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1 21173 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 21174 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 21175 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21176 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2 21177 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 21178 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 21179 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21180 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3 21181 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 21182 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8> 21183 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21184 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21185 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21186 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21187 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2) 21188 // CHECK: ret void 21189 void test_vst4_f16(float16_t * a, float16x4x4_t b) { 21190 vst4_f16(a, b); 21191 } 21192 21193 // CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x i64] %b.coerce) #0 { 21194 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 21195 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 21196 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 21197 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]* 21198 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21199 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 21200 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 21201 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21202 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 21203 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21204 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0 21205 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 21206 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 21207 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21208 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1 21209 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 21210 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 21211 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21212 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2 21213 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 21214 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 21215 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21216 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3 21217 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 21218 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8> 21219 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 21220 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 21221 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 21222 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float> 21223 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4) 21224 // CHECK: ret void 21225 void test_vst4_f32(float32_t * a, float32x2x4_t b) { 21226 vst4_f32(a, b); 21227 } 21228 21229 // CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x i64] %b.coerce) #0 { 21230 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 21231 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 21232 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 21233 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21234 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21235 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 21236 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 21237 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21238 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21239 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21240 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21241 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21242 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21243 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21244 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21245 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21246 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21247 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21248 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21249 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21250 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1) 21251 // CHECK: ret void 21252 void test_vst4_p8(poly8_t * a, poly8x8x4_t b) { 21253 vst4_p8(a, b); 21254 } 21255 21256 // CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x i64] %b.coerce) #0 { 21257 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 21258 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 21259 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 21260 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21261 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21262 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 21263 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 21264 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21265 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21266 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21267 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21268 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21269 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21270 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21271 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21272 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21273 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21274 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21275 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21276 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21277 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21278 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21279 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21280 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21281 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21282 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21283 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21284 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21285 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21286 // CHECK: call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2) 21287 // CHECK: ret void 21288 void test_vst4_p16(poly16_t * a, poly16x4x4_t b) { 21289 vst4_p16(a, b); 21290 } 21291 21292 21293 // CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [8 x i64] %b.coerce) #0 { 21294 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 21295 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 21296 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 21297 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 21298 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21299 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* 21300 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* 21301 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21302 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21303 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 21304 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 21305 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 21306 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 21307 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 21308 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 21309 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 21310 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 21311 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 21312 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 21313 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 21314 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 21315 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 21316 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 21317 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 21318 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 21319 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 21320 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 21321 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 21322 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 21323 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2) 21324 // CHECK: ret void 21325 void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) { 21326 vst4q_lane_u16(a, b, 7); 21327 } 21328 21329 // CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [8 x i64] %b.coerce) #0 { 21330 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 21331 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 21332 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 21333 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 21334 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21335 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* 21336 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* 21337 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21338 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21339 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 21340 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 21341 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 21342 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 21343 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 21344 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 21345 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 21346 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 21347 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 21348 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 21349 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 21350 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 21351 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 21352 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 21353 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 21354 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 21355 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 21356 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 21357 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 21358 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 21359 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4) 21360 // CHECK: ret void 21361 void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) { 21362 vst4q_lane_u32(a, b, 3); 21363 } 21364 21365 // CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [8 x i64] %b.coerce) #0 { 21366 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 21367 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 21368 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 21369 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 21370 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21371 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* 21372 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* 21373 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21374 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21375 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 21376 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 21377 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 21378 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 21379 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 21380 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 21381 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 21382 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 21383 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 21384 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 21385 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 21386 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 21387 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 21388 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 21389 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 21390 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 21391 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 21392 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 21393 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 21394 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 21395 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2) 21396 // CHECK: ret void 21397 void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) { 21398 vst4q_lane_s16(a, b, 7); 21399 } 21400 21401 // CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [8 x i64] %b.coerce) #0 { 21402 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 21403 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 21404 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 21405 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]* 21406 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21407 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* 21408 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* 21409 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21410 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21411 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 21412 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0 21413 // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 21414 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 21415 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 21416 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1 21417 // CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 21418 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 21419 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 21420 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2 21421 // CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 21422 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 21423 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 21424 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3 21425 // CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 21426 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 21427 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 21428 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 21429 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 21430 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 21431 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4) 21432 // CHECK: ret void 21433 void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) { 21434 vst4q_lane_s32(a, b, 3); 21435 } 21436 21437 // CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [8 x i64] %b.coerce) #0 { 21438 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 21439 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 21440 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 21441 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]* 21442 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21443 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* 21444 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* 21445 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21446 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 21447 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 21448 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0 21449 // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 21450 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 21451 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 21452 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1 21453 // CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 21454 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 21455 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 21456 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2 21457 // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 21458 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 21459 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 21460 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3 21461 // CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 21462 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> 21463 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 21464 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 21465 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 21466 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 21467 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2) 21468 // CHECK: ret void 21469 void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) { 21470 vst4q_lane_f16(a, b, 7); 21471 } 21472 21473 // CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [8 x i64] %b.coerce) #0 { 21474 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 21475 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 21476 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 21477 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]* 21478 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21479 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* 21480 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* 21481 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21482 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 21483 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 21484 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0 21485 // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 21486 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 21487 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 21488 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1 21489 // CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 21490 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 21491 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 21492 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2 21493 // CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 21494 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 21495 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 21496 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3 21497 // CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 21498 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> 21499 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 21500 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 21501 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 21502 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float> 21503 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4) 21504 // CHECK: ret void 21505 void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) { 21506 vst4q_lane_f32(a, b, 3); 21507 } 21508 21509 // CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [8 x i64] %b.coerce) #0 { 21510 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 21511 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 21512 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 21513 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]* 21514 // CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16 21515 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* 21516 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* 21517 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 64, i32 16, i1 false) 21518 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21519 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 21520 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0 21521 // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 21522 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 21523 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 21524 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1 21525 // CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 21526 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 21527 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 21528 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2 21529 // CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 21530 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 21531 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 21532 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3 21533 // CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 21534 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 21535 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 21536 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 21537 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 21538 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 21539 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2) 21540 // CHECK: ret void 21541 void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) { 21542 vst4q_lane_p16(a, b, 7); 21543 } 21544 21545 // CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x i64] %b.coerce) #0 { 21546 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 21547 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 21548 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 21549 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21550 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21551 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* 21552 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* 21553 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21554 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 21555 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21556 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21557 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 21558 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21559 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21560 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 21561 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21562 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21563 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 21564 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21565 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21566 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 21567 // CHECK: ret void 21568 void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) { 21569 vst4_lane_u8(a, b, 7); 21570 } 21571 21572 // CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x i64] %b.coerce) #0 { 21573 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 21574 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 21575 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 21576 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21577 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21578 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* 21579 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* 21580 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21581 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21582 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 21583 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21584 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21585 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21586 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 21587 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21588 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21589 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21590 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 21591 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21592 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21593 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21594 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 21595 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21596 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21597 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21598 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21599 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21600 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21601 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21602 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2) 21603 // CHECK: ret void 21604 void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) { 21605 vst4_lane_u16(a, b, 3); 21606 } 21607 21608 // CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x i64] %b.coerce) #0 { 21609 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 21610 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 21611 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 21612 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 21613 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21614 // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* 21615 // CHECK: [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* 21616 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21617 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21618 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 21619 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 21620 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 21621 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 21622 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 21623 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 21624 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 21625 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 21626 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 21627 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 21628 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 21629 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 21630 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 21631 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 21632 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 21633 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 21634 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 21635 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 21636 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 21637 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 21638 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4) 21639 // CHECK: ret void 21640 void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) { 21641 vst4_lane_u32(a, b, 1); 21642 } 21643 21644 // CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x i64] %b.coerce) #0 { 21645 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 21646 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 21647 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 21648 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21649 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21650 // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* 21651 // CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* 21652 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21653 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21654 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21655 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21656 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21657 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21658 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21659 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21660 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21661 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21662 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 21663 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21664 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21665 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 21666 // CHECK: ret void 21667 void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) { 21668 vst4_lane_s8(a, b, 7); 21669 } 21670 21671 // CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x i64] %b.coerce) #0 { 21672 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 21673 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 21674 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 21675 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21676 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21677 // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* 21678 // CHECK: [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* 21679 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21680 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21681 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21682 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21683 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21684 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21685 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21686 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21687 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21688 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21689 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21690 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21691 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21692 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21693 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 21694 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21695 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21696 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21697 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21698 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21699 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21700 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21701 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2) 21702 // CHECK: ret void 21703 void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) { 21704 vst4_lane_s16(a, b, 3); 21705 } 21706 21707 // CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x i64] %b.coerce) #0 { 21708 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 21709 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 21710 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 21711 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]* 21712 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21713 // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* 21714 // CHECK: [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* 21715 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21716 // CHECK: [[TMP3:%.*]] = bitcast i32* %a to i8* 21717 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21718 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0 21719 // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 21720 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 21721 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21722 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1 21723 // CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 21724 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 21725 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21726 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2 21727 // CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 21728 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 21729 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 21730 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3 21731 // CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 21732 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 21733 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 21734 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 21735 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 21736 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 21737 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4) 21738 // CHECK: ret void 21739 void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) { 21740 vst4_lane_s32(a, b, 1); 21741 } 21742 21743 // CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x i64] %b.coerce) #0 { 21744 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 21745 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 21746 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 21747 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]* 21748 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21749 // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* 21750 // CHECK: [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* 21751 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21752 // CHECK: [[TMP3:%.*]] = bitcast half* %a to i8* 21753 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21754 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0 21755 // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 21756 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 21757 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21758 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1 21759 // CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 21760 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 21761 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21762 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2 21763 // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 21764 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 21765 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 21766 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3 21767 // CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 21768 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8> 21769 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21770 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21771 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21772 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21773 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2) 21774 // CHECK: ret void 21775 void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) { 21776 vst4_lane_f16(a, b, 3); 21777 } 21778 21779 // CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x i64] %b.coerce) #0 { 21780 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 21781 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 21782 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 21783 // CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]* 21784 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21785 // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* 21786 // CHECK: [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* 21787 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21788 // CHECK: [[TMP3:%.*]] = bitcast float* %a to i8* 21789 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21790 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0 21791 // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 21792 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 21793 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21794 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1 21795 // CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 21796 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 21797 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21798 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2 21799 // CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 21800 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 21801 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 21802 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3 21803 // CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 21804 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8> 21805 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 21806 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 21807 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 21808 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float> 21809 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4) 21810 // CHECK: ret void 21811 void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) { 21812 vst4_lane_f32(a, b, 1); 21813 } 21814 21815 // CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x i64] %b.coerce) #0 { 21816 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 21817 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 21818 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 21819 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 21820 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21821 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* 21822 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* 21823 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21824 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21825 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0 21826 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 21827 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21828 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1 21829 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 21830 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21831 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2 21832 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 21833 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 21834 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 21835 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 21836 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) 21837 // CHECK: ret void 21838 void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) { 21839 vst4_lane_p8(a, b, 7); 21840 } 21841 21842 // CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x i64] %b.coerce) #0 { 21843 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 21844 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 21845 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 21846 // CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]* 21847 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 21848 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* 21849 // CHECK: [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* 21850 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP1]], i8* [[TMP2]], i32 32, i32 8, i1 false) 21851 // CHECK: [[TMP3:%.*]] = bitcast i16* %a to i8* 21852 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21853 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0 21854 // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 21855 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 21856 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21857 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1 21858 // CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 21859 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 21860 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21861 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2 21862 // CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 21863 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 21864 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 21865 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3 21866 // CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 21867 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 21868 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 21869 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 21870 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 21871 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 21872 // CHECK: call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2) 21873 // CHECK: ret void 21874 void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) { 21875 vst4_lane_p16(a, b, 3); 21876 } 21877 21878 21879 // CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { 21880 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b 21881 // CHECK: ret <8 x i8> [[SUB_I]] 21882 int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) { 21883 return vsub_s8(a, b); 21884 } 21885 21886 // CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { 21887 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b 21888 // CHECK: ret <4 x i16> [[SUB_I]] 21889 int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) { 21890 return vsub_s16(a, b); 21891 } 21892 21893 // CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { 21894 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b 21895 // CHECK: ret <2 x i32> [[SUB_I]] 21896 int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) { 21897 return vsub_s32(a, b); 21898 } 21899 21900 // CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { 21901 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b 21902 // CHECK: ret <1 x i64> [[SUB_I]] 21903 int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) { 21904 return vsub_s64(a, b); 21905 } 21906 21907 // CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %a, <2 x float> %b) #0 { 21908 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, %b 21909 // CHECK: ret <2 x float> [[SUB_I]] 21910 float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) { 21911 return vsub_f32(a, b); 21912 } 21913 21914 // CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { 21915 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b 21916 // CHECK: ret <8 x i8> [[SUB_I]] 21917 uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) { 21918 return vsub_u8(a, b); 21919 } 21920 21921 // CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { 21922 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b 21923 // CHECK: ret <4 x i16> [[SUB_I]] 21924 uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) { 21925 return vsub_u16(a, b); 21926 } 21927 21928 // CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { 21929 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b 21930 // CHECK: ret <2 x i32> [[SUB_I]] 21931 uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) { 21932 return vsub_u32(a, b); 21933 } 21934 21935 // CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { 21936 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b 21937 // CHECK: ret <1 x i64> [[SUB_I]] 21938 uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) { 21939 return vsub_u64(a, b); 21940 } 21941 21942 // CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 21943 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b 21944 // CHECK: ret <16 x i8> [[SUB_I]] 21945 int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) { 21946 return vsubq_s8(a, b); 21947 } 21948 21949 // CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 21950 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b 21951 // CHECK: ret <8 x i16> [[SUB_I]] 21952 int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) { 21953 return vsubq_s16(a, b); 21954 } 21955 21956 // CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 21957 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b 21958 // CHECK: ret <4 x i32> [[SUB_I]] 21959 int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) { 21960 return vsubq_s32(a, b); 21961 } 21962 21963 // CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { 21964 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b 21965 // CHECK: ret <2 x i64> [[SUB_I]] 21966 int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) { 21967 return vsubq_s64(a, b); 21968 } 21969 21970 // CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) #0 { 21971 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, %b 21972 // CHECK: ret <4 x float> [[SUB_I]] 21973 float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) { 21974 return vsubq_f32(a, b); 21975 } 21976 21977 // CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 21978 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b 21979 // CHECK: ret <16 x i8> [[SUB_I]] 21980 uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) { 21981 return vsubq_u8(a, b); 21982 } 21983 21984 // CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 21985 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b 21986 // CHECK: ret <8 x i16> [[SUB_I]] 21987 uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) { 21988 return vsubq_u16(a, b); 21989 } 21990 21991 // CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 21992 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b 21993 // CHECK: ret <4 x i32> [[SUB_I]] 21994 uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) { 21995 return vsubq_u32(a, b); 21996 } 21997 21998 // CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { 21999 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b 22000 // CHECK: ret <2 x i64> [[SUB_I]] 22001 uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) { 22002 return vsubq_u64(a, b); 22003 } 22004 22005 22006 // CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { 22007 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 22008 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 22009 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 22010 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 22011 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 22012 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 22013 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 22014 // CHECK: ret <8 x i8> [[VSUBHN2_I]] 22015 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { 22016 return vsubhn_s16(a, b); 22017 } 22018 22019 // CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { 22020 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 22021 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 22022 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 22023 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 22024 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 22025 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16> 22026 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 22027 // CHECK: ret <4 x i16> [[VSUBHN2_I]] 22028 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { 22029 return vsubhn_s32(a, b); 22030 } 22031 22032 // CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { 22033 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22034 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 22035 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22036 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 22037 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 22038 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32> 22039 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 22040 // CHECK: ret <2 x i32> [[VSUBHN2_I]] 22041 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { 22042 return vsubhn_s64(a, b); 22043 } 22044 22045 // CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { 22046 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 22047 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 22048 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 22049 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 22050 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] 22051 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 22052 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 22053 // CHECK: ret <8 x i8> [[VSUBHN2_I]] 22054 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { 22055 return vsubhn_u16(a, b); 22056 } 22057 22058 // CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { 22059 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 22060 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 22061 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 22062 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 22063 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] 22064 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16> 22065 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 22066 // CHECK: ret <4 x i16> [[VSUBHN2_I]] 22067 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { 22068 return vsubhn_u32(a, b); 22069 } 22070 22071 // CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { 22072 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 22073 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 22074 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 22075 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 22076 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] 22077 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32> 22078 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 22079 // CHECK: ret <2 x i32> [[VSUBHN2_I]] 22080 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { 22081 return vsubhn_u64(a, b); 22082 } 22083 22084 22085 // CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 { 22086 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 22087 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 22088 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22089 // CHECK: ret <8 x i16> [[SUB_I]] 22090 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { 22091 return vsubl_s8(a, b); 22092 } 22093 22094 // CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 { 22095 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22096 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 22097 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 22098 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22099 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22100 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> 22101 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22102 // CHECK: ret <4 x i32> [[SUB_I]] 22103 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { 22104 return vsubl_s16(a, b); 22105 } 22106 22107 // CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 { 22108 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22109 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22110 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 22111 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22112 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 22113 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> 22114 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22115 // CHECK: ret <2 x i64> [[SUB_I]] 22116 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { 22117 return vsubl_s32(a, b); 22118 } 22119 22120 // CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 { 22121 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 22122 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 22123 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22124 // CHECK: ret <8 x i16> [[SUB_I]] 22125 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { 22126 return vsubl_u8(a, b); 22127 } 22128 22129 // CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 { 22130 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22131 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 22132 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 22133 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22134 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22135 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> 22136 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22137 // CHECK: ret <4 x i32> [[SUB_I]] 22138 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { 22139 return vsubl_u16(a, b); 22140 } 22141 22142 // CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 { 22143 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22144 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22145 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 22146 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22147 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 22148 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> 22149 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 22150 // CHECK: ret <2 x i64> [[SUB_I]] 22151 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { 22152 return vsubl_u32(a, b); 22153 } 22154 22155 22156 // CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 { 22157 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 22158 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 22159 // CHECK: ret <8 x i16> [[SUB_I]] 22160 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { 22161 return vsubw_s8(a, b); 22162 } 22163 22164 // CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 { 22165 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22166 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 22167 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 22168 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 22169 // CHECK: ret <4 x i32> [[SUB_I]] 22170 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { 22171 return vsubw_s16(a, b); 22172 } 22173 22174 // CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 { 22175 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22176 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22177 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 22178 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 22179 // CHECK: ret <2 x i64> [[SUB_I]] 22180 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { 22181 return vsubw_s32(a, b); 22182 } 22183 22184 // CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 { 22185 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 22186 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 22187 // CHECK: ret <8 x i16> [[SUB_I]] 22188 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { 22189 return vsubw_u8(a, b); 22190 } 22191 22192 // CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 { 22193 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22194 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 22195 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 22196 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 22197 // CHECK: ret <4 x i32> [[SUB_I]] 22198 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { 22199 return vsubw_u16(a, b); 22200 } 22201 22202 // CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 { 22203 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22204 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 22205 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 22206 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 22207 // CHECK: ret <2 x i64> [[SUB_I]] 22208 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { 22209 return vsubw_u32(a, b); 22210 } 22211 22212 22213 // CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 { 22214 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 22215 // CHECK: ret <8 x i8> [[VTBL1_I]] 22216 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { 22217 return vtbl1_u8(a, b); 22218 } 22219 22220 // CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 { 22221 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 22222 // CHECK: ret <8 x i8> [[VTBL1_I]] 22223 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) { 22224 return vtbl1_s8(a, b); 22225 } 22226 22227 // CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 { 22228 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 22229 // CHECK: ret <8 x i8> [[VTBL1_I]] 22230 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) { 22231 return vtbl1_p8(a, b); 22232 } 22233 22234 22235 // CHECK-LABEL: define <8 x i8> @test_vtbl2_u8([2 x i64] %a.coerce, <8 x i8> %b) #0 { 22236 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 22237 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8 22238 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0 22239 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22240 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8 22241 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0 22242 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22243 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22244 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 22245 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22246 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22247 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 22248 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22249 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22250 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 22251 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22252 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22253 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 22254 // CHECK: ret <8 x i8> [[VTBL2_I]] 22255 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { 22256 return vtbl2_u8(a, b); 22257 } 22258 22259 // CHECK-LABEL: define <8 x i8> @test_vtbl2_s8([2 x i64] %a.coerce, <8 x i8> %b) #0 { 22260 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8 22261 // CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8 22262 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0 22263 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22264 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8 22265 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0 22266 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22267 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22268 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 22269 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22270 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22271 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 22272 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22273 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22274 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 22275 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22276 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22277 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 22278 // CHECK: ret <8 x i8> [[VTBL2_I]] 22279 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { 22280 return vtbl2_s8(a, b); 22281 } 22282 22283 // CHECK-LABEL: define <8 x i8> @test_vtbl2_p8([2 x i64] %a.coerce, <8 x i8> %b) #0 { 22284 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 22285 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8 22286 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0 22287 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22288 // CHECK: store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8 22289 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0 22290 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22291 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22292 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 22293 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22294 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22295 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 22296 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22297 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22298 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 22299 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22300 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22301 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b) #4 22302 // CHECK: ret <8 x i8> [[VTBL2_I]] 22303 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { 22304 return vtbl2_p8(a, b); 22305 } 22306 22307 22308 // CHECK-LABEL: define <8 x i8> @test_vtbl3_u8([3 x i64] %a.coerce, <8 x i8> %b) #0 { 22309 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8 22310 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8 22311 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0 22312 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22313 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8 22314 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0 22315 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22316 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22317 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 22318 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22319 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22320 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 22321 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22322 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22323 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 22324 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22325 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22326 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 22327 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22328 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22329 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 22330 // CHECK: ret <8 x i8> [[VTBL3_I]] 22331 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { 22332 return vtbl3_u8(a, b); 22333 } 22334 22335 // CHECK-LABEL: define <8 x i8> @test_vtbl3_s8([3 x i64] %a.coerce, <8 x i8> %b) #0 { 22336 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8 22337 // CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8 22338 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0 22339 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22340 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8 22341 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0 22342 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22343 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22344 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 22345 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22346 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22347 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 22348 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22349 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22350 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 22351 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22352 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22353 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 22354 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22355 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22356 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 22357 // CHECK: ret <8 x i8> [[VTBL3_I]] 22358 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { 22359 return vtbl3_s8(a, b); 22360 } 22361 22362 // CHECK-LABEL: define <8 x i8> @test_vtbl3_p8([3 x i64] %a.coerce, <8 x i8> %b) #0 { 22363 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8 22364 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8 22365 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0 22366 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22367 // CHECK: store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8 22368 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0 22369 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22370 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22371 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 22372 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22373 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22374 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 22375 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22376 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22377 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 22378 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22379 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22380 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 22381 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22382 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22383 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b) #4 22384 // CHECK: ret <8 x i8> [[VTBL3_I]] 22385 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { 22386 return vtbl3_p8(a, b); 22387 } 22388 22389 22390 // CHECK-LABEL: define <8 x i8> @test_vtbl4_u8([4 x i64] %a.coerce, <8 x i8> %b) #0 { 22391 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8 22392 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8 22393 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0 22394 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22395 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8 22396 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0 22397 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22398 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22399 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22400 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22401 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22402 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22403 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22404 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22405 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22406 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22407 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22408 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22409 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22410 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22411 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 22412 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22413 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22414 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 22415 // CHECK: ret <8 x i8> [[VTBL4_I]] 22416 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { 22417 return vtbl4_u8(a, b); 22418 } 22419 22420 // CHECK-LABEL: define <8 x i8> @test_vtbl4_s8([4 x i64] %a.coerce, <8 x i8> %b) #0 { 22421 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8 22422 // CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8 22423 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0 22424 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22425 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8 22426 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0 22427 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22428 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22429 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22430 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22431 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22432 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22433 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22434 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22435 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22436 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22437 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22438 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22439 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22440 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22441 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 22442 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22443 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22444 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 22445 // CHECK: ret <8 x i8> [[VTBL4_I]] 22446 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { 22447 return vtbl4_s8(a, b); 22448 } 22449 22450 // CHECK-LABEL: define <8 x i8> @test_vtbl4_p8([4 x i64] %a.coerce, <8 x i8> %b) #0 { 22451 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8 22452 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8 22453 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0 22454 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22455 // CHECK: store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8 22456 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0 22457 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22458 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22459 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22460 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22461 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22462 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22463 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22464 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22465 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22466 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22467 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22468 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22469 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22470 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22471 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 22472 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22473 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22474 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b) #4 22475 // CHECK: ret <8 x i8> [[VTBL4_I]] 22476 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) { 22477 return vtbl4_p8(a, b); 22478 } 22479 22480 22481 // CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 22482 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 22483 // CHECK: ret <8 x i8> [[VTBX1_I]] 22484 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { 22485 return vtbx1_u8(a, b, c); 22486 } 22487 22488 // CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 22489 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 22490 // CHECK: ret <8 x i8> [[VTBX1_I]] 22491 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) { 22492 return vtbx1_s8(a, b, c); 22493 } 22494 22495 // CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { 22496 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 22497 // CHECK: ret <8 x i8> [[VTBX1_I]] 22498 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) { 22499 return vtbx1_p8(a, b, c); 22500 } 22501 22502 22503 // CHECK-LABEL: define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { 22504 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 22505 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 22506 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 22507 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22508 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 22509 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 22510 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22511 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22512 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 22513 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22514 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22515 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 22516 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22517 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22518 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 22519 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22520 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22521 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 22522 // CHECK: ret <8 x i8> [[VTBX2_I]] 22523 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { 22524 return vtbx2_u8(a, b, c); 22525 } 22526 22527 // CHECK-LABEL: define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { 22528 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8 22529 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 22530 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 22531 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22532 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 22533 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 22534 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22535 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22536 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 22537 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22538 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22539 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 22540 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22541 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22542 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 22543 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22544 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22545 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 22546 // CHECK: ret <8 x i8> [[VTBX2_I]] 22547 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { 22548 return vtbx2_s8(a, b, c); 22549 } 22550 22551 // CHECK-LABEL: define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { 22552 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 22553 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 22554 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 22555 // CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]* 22556 // CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8 22557 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 22558 // CHECK: [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]* 22559 // CHECK: [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8 22560 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 22561 // CHECK: [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]* 22562 // CHECK: store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8 22563 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 22564 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22565 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22566 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 22567 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22568 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22569 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c) #4 22570 // CHECK: ret <8 x i8> [[VTBX2_I]] 22571 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { 22572 return vtbx2_p8(a, b, c); 22573 } 22574 22575 22576 // CHECK-LABEL: define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { 22577 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8 22578 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 22579 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 22580 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22581 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 22582 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 22583 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22584 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22585 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 22586 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22587 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22588 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 22589 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22590 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22591 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 22592 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22593 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22594 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 22595 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22596 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22597 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 22598 // CHECK: ret <8 x i8> [[VTBX3_I]] 22599 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) { 22600 return vtbx3_u8(a, b, c); 22601 } 22602 22603 // CHECK-LABEL: define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { 22604 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8 22605 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 22606 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 22607 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22608 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 22609 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 22610 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22611 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22612 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 22613 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22614 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22615 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 22616 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22617 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22618 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 22619 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22620 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22621 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 22622 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22623 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22624 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 22625 // CHECK: ret <8 x i8> [[VTBX3_I]] 22626 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) { 22627 return vtbx3_s8(a, b, c); 22628 } 22629 22630 // CHECK-LABEL: define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { 22631 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8 22632 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 22633 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 22634 // CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]* 22635 // CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8 22636 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 22637 // CHECK: [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]* 22638 // CHECK: [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8 22639 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 22640 // CHECK: [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]* 22641 // CHECK: store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8 22642 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 22643 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22644 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22645 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 22646 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22647 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22648 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 22649 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22650 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22651 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c) #4 22652 // CHECK: ret <8 x i8> [[VTBX3_I]] 22653 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) { 22654 return vtbx3_p8(a, b, c); 22655 } 22656 22657 22658 // CHECK-LABEL: define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { 22659 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8 22660 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 22661 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 22662 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22663 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 22664 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 22665 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22666 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22667 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22668 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22669 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22670 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22671 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22672 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22673 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22674 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22675 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22676 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22677 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22678 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22679 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 22680 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22681 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22682 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 22683 // CHECK: ret <8 x i8> [[VTBX4_I]] 22684 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) { 22685 return vtbx4_u8(a, b, c); 22686 } 22687 22688 // CHECK-LABEL: define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { 22689 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8 22690 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 22691 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 22692 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22693 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 22694 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 22695 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22696 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22697 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22698 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22699 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22700 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22701 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22702 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22703 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22704 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22705 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22706 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22707 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22708 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22709 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 22710 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22711 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22712 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 22713 // CHECK: ret <8 x i8> [[VTBX4_I]] 22714 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) { 22715 return vtbx4_s8(a, b, c); 22716 } 22717 22718 // CHECK-LABEL: define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { 22719 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8 22720 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 22721 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 22722 // CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]* 22723 // CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8 22724 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 22725 // CHECK: [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]* 22726 // CHECK: [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8 22727 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22728 // CHECK: [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]* 22729 // CHECK: store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8 22730 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22731 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0 22732 // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 22733 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22734 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1 22735 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 22736 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22737 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2 22738 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 22739 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 22740 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3 22741 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 22742 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c) #4 22743 // CHECK: ret <8 x i8> [[VTBX4_I]] 22744 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) { 22745 return vtbx4_p8(a, b, c); 22746 } 22747 22748 22749 // CHECK-LABEL: define void @test_vtrn_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 22750 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 22751 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 22752 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 22753 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 22754 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] 22755 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 22756 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 22757 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] 22758 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 22759 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 22760 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 22761 // CHECK: ret void 22762 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { 22763 return vtrn_s8(a, b); 22764 } 22765 22766 // CHECK-LABEL: define void @test_vtrn_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 22767 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 22768 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 22769 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22770 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22771 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 22772 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 22773 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22774 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22775 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] 22776 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 22777 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 22778 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] 22779 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 22780 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 22781 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22782 // CHECK: ret void 22783 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { 22784 return vtrn_s16(a, b); 22785 } 22786 22787 // CHECK-LABEL: define void @test_vtrn_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 22788 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 22789 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 22790 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22791 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22792 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 22793 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 22794 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 22795 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 22796 // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]] 22797 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 22798 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 22799 // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]] 22800 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 22801 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 22802 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22803 // CHECK: ret void 22804 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { 22805 return vtrn_s32(a, b); 22806 } 22807 22808 // CHECK-LABEL: define void @test_vtrn_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 22809 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 22810 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 22811 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 22812 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 22813 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] 22814 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 22815 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 22816 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] 22817 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 22818 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 22819 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 22820 // CHECK: ret void 22821 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { 22822 return vtrn_u8(a, b); 22823 } 22824 22825 // CHECK-LABEL: define void @test_vtrn_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 22826 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 22827 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 22828 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22829 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22830 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 22831 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 22832 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22833 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22834 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] 22835 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 22836 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 22837 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] 22838 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 22839 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 22840 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22841 // CHECK: ret void 22842 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { 22843 return vtrn_u16(a, b); 22844 } 22845 22846 // CHECK-LABEL: define void @test_vtrn_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 22847 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 22848 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 22849 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 22850 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 22851 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 22852 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 22853 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 22854 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 22855 // CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]] 22856 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 22857 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 22858 // CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]] 22859 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 22860 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 22861 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22862 // CHECK: ret void 22863 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { 22864 return vtrn_u32(a, b); 22865 } 22866 22867 // CHECK-LABEL: define void @test_vtrn_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { 22868 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 22869 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 22870 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> 22871 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> 22872 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 22873 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 22874 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 22875 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2> 22876 // CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]] 22877 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 22878 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3> 22879 // CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]] 22880 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 22881 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 22882 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22883 // CHECK: ret void 22884 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { 22885 return vtrn_f32(a, b); 22886 } 22887 22888 // CHECK-LABEL: define void @test_vtrn_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 22889 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 22890 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 22891 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 22892 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 22893 // CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] 22894 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 22895 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 22896 // CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] 22897 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 22898 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 22899 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 22900 // CHECK: ret void 22901 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { 22902 return vtrn_p8(a, b); 22903 } 22904 22905 // CHECK-LABEL: define void @test_vtrn_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 22906 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 22907 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 22908 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 22909 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 22910 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 22911 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 22912 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 22913 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22914 // CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] 22915 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 22916 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 22917 // CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] 22918 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 22919 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 22920 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 22921 // CHECK: ret void 22922 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { 22923 return vtrn_p16(a, b); 22924 } 22925 22926 // CHECK-LABEL: define void @test_vtrnq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 22927 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 22928 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 22929 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 22930 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 22931 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] 22932 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 22933 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 22934 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] 22935 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* 22936 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 22937 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 22938 // CHECK: ret void 22939 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { 22940 return vtrnq_s8(a, b); 22941 } 22942 22943 // CHECK-LABEL: define void @test_vtrnq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 22944 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 22945 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 22946 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 22947 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 22948 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 22949 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 22950 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 22951 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 22952 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] 22953 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 22954 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 22955 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] 22956 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 22957 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 22958 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 22959 // CHECK: ret void 22960 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { 22961 return vtrnq_s16(a, b); 22962 } 22963 22964 // CHECK-LABEL: define void @test_vtrnq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 22965 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 22966 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 22967 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 22968 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 22969 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 22970 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 22971 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 22972 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22973 // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]] 22974 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 22975 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 22976 // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]] 22977 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 22978 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 22979 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 22980 // CHECK: ret void 22981 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { 22982 return vtrnq_s32(a, b); 22983 } 22984 22985 // CHECK-LABEL: define void @test_vtrnq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 22986 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 22987 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 22988 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 22989 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 22990 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] 22991 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 22992 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 22993 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] 22994 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* 22995 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 22996 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 22997 // CHECK: ret void 22998 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { 22999 return vtrnq_u8(a, b); 23000 } 23001 23002 // CHECK-LABEL: define void @test_vtrnq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23003 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 23004 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23005 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23006 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23007 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23008 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23009 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23010 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 23011 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] 23012 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23013 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 23014 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] 23015 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 23016 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23017 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23018 // CHECK: ret void 23019 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { 23020 return vtrnq_u16(a, b); 23021 } 23022 23023 // CHECK-LABEL: define void @test_vtrnq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23024 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 23025 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23026 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23027 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23028 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23029 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23030 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23031 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 23032 // CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]] 23033 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23034 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 23035 // CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]] 23036 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 23037 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23038 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23039 // CHECK: ret void 23040 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { 23041 return vtrnq_u32(a, b); 23042 } 23043 23044 // CHECK-LABEL: define void @test_vtrnq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { 23045 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 23046 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23047 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> 23048 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> 23049 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 23050 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 23051 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 23052 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 23053 // CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]] 23054 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 23055 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 23056 // CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]] 23057 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 23058 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23059 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23060 // CHECK: ret void 23061 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { 23062 return vtrnq_f32(a, b); 23063 } 23064 23065 // CHECK-LABEL: define void @test_vtrnq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23066 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 23067 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23068 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23069 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 23070 // CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] 23071 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23072 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 23073 // CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] 23074 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* 23075 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23076 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23077 // CHECK: ret void 23078 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { 23079 return vtrnq_p8(a, b); 23080 } 23081 23082 // CHECK-LABEL: define void @test_vtrnq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23083 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 23084 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23085 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23086 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23087 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23088 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23089 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23090 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 23091 // CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] 23092 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23093 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 23094 // CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] 23095 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 23096 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23097 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23098 // CHECK: ret void 23099 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { 23100 return vtrnq_p16(a, b); 23101 } 23102 23103 23104 // CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %a, <8 x i8> %b) #0 { 23105 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b 23106 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 23107 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 23108 // CHECK: ret <8 x i8> [[VTST_I]] 23109 uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) { 23110 return vtst_s8(a, b); 23111 } 23112 23113 // CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %a, <4 x i16> %b) #0 { 23114 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23115 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23116 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 23117 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23118 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 23119 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 23120 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 23121 // CHECK: ret <4 x i16> [[VTST_I]] 23122 uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) { 23123 return vtst_s16(a, b); 23124 } 23125 23126 // CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %a, <2 x i32> %b) #0 { 23127 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23128 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23129 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 23130 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23131 // CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] 23132 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer 23133 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> 23134 // CHECK: ret <2 x i32> [[VTST_I]] 23135 uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) { 23136 return vtst_s32(a, b); 23137 } 23138 23139 // CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %a, <8 x i8> %b) #0 { 23140 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b 23141 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 23142 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 23143 // CHECK: ret <8 x i8> [[VTST_I]] 23144 uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) { 23145 return vtst_u8(a, b); 23146 } 23147 23148 // CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %a, <4 x i16> %b) #0 { 23149 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23150 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23151 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 23152 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23153 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 23154 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 23155 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 23156 // CHECK: ret <4 x i16> [[VTST_I]] 23157 uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) { 23158 return vtst_u16(a, b); 23159 } 23160 23161 // CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %a, <2 x i32> %b) #0 { 23162 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23163 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23164 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 23165 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23166 // CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] 23167 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer 23168 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> 23169 // CHECK: ret <2 x i32> [[VTST_I]] 23170 uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) { 23171 return vtst_u32(a, b); 23172 } 23173 23174 // CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %a, <8 x i8> %b) #0 { 23175 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b 23176 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 23177 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 23178 // CHECK: ret <8 x i8> [[VTST_I]] 23179 uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) { 23180 return vtst_p8(a, b); 23181 } 23182 23183 // CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %a, <4 x i16> %b) #0 { 23184 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23185 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23186 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 23187 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23188 // CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] 23189 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer 23190 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> 23191 // CHECK: ret <4 x i16> [[VTST_I]] 23192 uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) { 23193 return vtst_p16(a, b); 23194 } 23195 23196 // CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %a, <16 x i8> %b) #0 { 23197 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b 23198 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 23199 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 23200 // CHECK: ret <16 x i8> [[VTST_I]] 23201 uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) { 23202 return vtstq_s8(a, b); 23203 } 23204 23205 // CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %a, <8 x i16> %b) #0 { 23206 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23207 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23208 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 23209 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23210 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 23211 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 23212 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 23213 // CHECK: ret <8 x i16> [[VTST_I]] 23214 uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) { 23215 return vtstq_s16(a, b); 23216 } 23217 23218 // CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %a, <4 x i32> %b) #0 { 23219 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23220 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23221 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 23222 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23223 // CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] 23224 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 23225 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 23226 // CHECK: ret <4 x i32> [[VTST_I]] 23227 uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) { 23228 return vtstq_s32(a, b); 23229 } 23230 23231 // CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %a, <16 x i8> %b) #0 { 23232 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b 23233 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 23234 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 23235 // CHECK: ret <16 x i8> [[VTST_I]] 23236 uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) { 23237 return vtstq_u8(a, b); 23238 } 23239 23240 // CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %a, <8 x i16> %b) #0 { 23241 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23242 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23243 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 23244 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23245 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 23246 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 23247 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 23248 // CHECK: ret <8 x i16> [[VTST_I]] 23249 uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) { 23250 return vtstq_u16(a, b); 23251 } 23252 23253 // CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %a, <4 x i32> %b) #0 { 23254 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23255 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23256 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 23257 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23258 // CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] 23259 // CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 23260 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 23261 // CHECK: ret <4 x i32> [[VTST_I]] 23262 uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) { 23263 return vtstq_u32(a, b); 23264 } 23265 23266 // CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %a, <16 x i8> %b) #0 { 23267 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b 23268 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 23269 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 23270 // CHECK: ret <16 x i8> [[VTST_I]] 23271 uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) { 23272 return vtstq_p8(a, b); 23273 } 23274 23275 // CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %a, <8 x i16> %b) #0 { 23276 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23277 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23278 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 23279 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23280 // CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] 23281 // CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer 23282 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 23283 // CHECK: ret <8 x i16> [[VTST_I]] 23284 uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) { 23285 return vtstq_p16(a, b); 23286 } 23287 23288 23289 // CHECK-LABEL: define void @test_vuzp_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23290 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 23291 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 23292 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23293 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23294 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] 23295 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23296 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23297 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] 23298 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 23299 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 23300 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23301 // CHECK: ret void 23302 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { 23303 return vuzp_s8(a, b); 23304 } 23305 23306 // CHECK-LABEL: define void @test_vuzp_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23307 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 23308 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 23309 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23310 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23311 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23312 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23313 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23314 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23315 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] 23316 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23317 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23318 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] 23319 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 23320 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 23321 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23322 // CHECK: ret void 23323 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { 23324 return vuzp_s16(a, b); 23325 } 23326 23327 // CHECK-LABEL: define void @test_vuzp_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 23328 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 23329 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 23330 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23331 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23332 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 23333 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23334 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 23335 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 23336 // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]] 23337 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 23338 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 23339 // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]] 23340 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 23341 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 23342 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23343 // CHECK: ret void 23344 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { 23345 return vuzp_s32(a, b); 23346 } 23347 23348 // CHECK-LABEL: define void @test_vuzp_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23349 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 23350 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 23351 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23352 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23353 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] 23354 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23355 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23356 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] 23357 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 23358 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 23359 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23360 // CHECK: ret void 23361 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { 23362 return vuzp_u8(a, b); 23363 } 23364 23365 // CHECK-LABEL: define void @test_vuzp_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23366 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 23367 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 23368 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23369 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23370 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23371 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23372 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23373 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23374 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] 23375 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23376 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23377 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] 23378 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 23379 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 23380 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23381 // CHECK: ret void 23382 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { 23383 return vuzp_u16(a, b); 23384 } 23385 23386 // CHECK-LABEL: define void @test_vuzp_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 23387 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 23388 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 23389 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23390 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23391 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 23392 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23393 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 23394 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 23395 // CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]] 23396 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 23397 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 23398 // CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]] 23399 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 23400 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 23401 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23402 // CHECK: ret void 23403 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { 23404 return vuzp_u32(a, b); 23405 } 23406 23407 // CHECK-LABEL: define void @test_vuzp_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { 23408 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 23409 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 23410 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> 23411 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> 23412 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 23413 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 23414 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 23415 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2> 23416 // CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]] 23417 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 23418 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3> 23419 // CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]] 23420 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 23421 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 23422 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23423 // CHECK: ret void 23424 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { 23425 return vuzp_f32(a, b); 23426 } 23427 23428 // CHECK-LABEL: define void @test_vuzp_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23429 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 23430 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 23431 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23432 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23433 // CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] 23434 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23435 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23436 // CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] 23437 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 23438 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 23439 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23440 // CHECK: ret void 23441 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { 23442 return vuzp_p8(a, b); 23443 } 23444 23445 // CHECK-LABEL: define void @test_vuzp_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23446 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 23447 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 23448 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23449 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23450 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23451 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23452 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23453 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23454 // CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] 23455 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23456 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23457 // CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] 23458 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 23459 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 23460 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23461 // CHECK: ret void 23462 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { 23463 return vuzp_p16(a, b); 23464 } 23465 23466 // CHECK-LABEL: define void @test_vuzpq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23467 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 23468 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 23469 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23470 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 23471 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] 23472 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23473 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 23474 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] 23475 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* 23476 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 23477 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23478 // CHECK: ret void 23479 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { 23480 return vuzpq_s8(a, b); 23481 } 23482 23483 // CHECK-LABEL: define void @test_vuzpq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23484 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 23485 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 23486 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23487 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23488 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23489 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23490 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23491 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23492 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] 23493 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23494 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23495 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] 23496 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 23497 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 23498 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23499 // CHECK: ret void 23500 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { 23501 return vuzpq_s16(a, b); 23502 } 23503 23504 // CHECK-LABEL: define void @test_vuzpq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23505 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 23506 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 23507 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23508 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23509 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23510 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23511 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23512 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23513 // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]] 23514 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23515 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23516 // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]] 23517 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 23518 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 23519 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23520 // CHECK: ret void 23521 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { 23522 return vuzpq_s32(a, b); 23523 } 23524 23525 // CHECK-LABEL: define void @test_vuzpq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23526 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 23527 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 23528 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23529 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 23530 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] 23531 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23532 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 23533 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] 23534 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* 23535 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 23536 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23537 // CHECK: ret void 23538 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { 23539 return vuzpq_u8(a, b); 23540 } 23541 23542 // CHECK-LABEL: define void @test_vuzpq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23543 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 23544 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23545 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23546 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23547 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23548 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23549 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23550 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23551 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] 23552 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23553 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23554 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] 23555 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 23556 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23557 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23558 // CHECK: ret void 23559 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { 23560 return vuzpq_u16(a, b); 23561 } 23562 23563 // CHECK-LABEL: define void @test_vuzpq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23564 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 23565 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23566 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23567 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23568 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23569 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23570 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23571 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23572 // CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]] 23573 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23574 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23575 // CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]] 23576 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 23577 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23578 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23579 // CHECK: ret void 23580 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { 23581 return vuzpq_u32(a, b); 23582 } 23583 23584 // CHECK-LABEL: define void @test_vuzpq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { 23585 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 23586 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23587 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> 23588 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> 23589 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 23590 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 23591 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 23592 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 23593 // CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]] 23594 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 23595 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23596 // CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]] 23597 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 23598 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23599 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23600 // CHECK: ret void 23601 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { 23602 return vuzpq_f32(a, b); 23603 } 23604 23605 // CHECK-LABEL: define void @test_vuzpq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23606 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 23607 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23608 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23609 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 23610 // CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] 23611 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23612 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 23613 // CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] 23614 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* 23615 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23616 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23617 // CHECK: ret void 23618 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { 23619 return vuzpq_p8(a, b); 23620 } 23621 23622 // CHECK-LABEL: define void @test_vuzpq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23623 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 23624 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23625 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23626 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23627 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23628 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23629 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23630 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 23631 // CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] 23632 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23633 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23634 // CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] 23635 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 23636 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23637 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23638 // CHECK: ret void 23639 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { 23640 return vuzpq_p16(a, b); 23641 } 23642 23643 23644 // CHECK-LABEL: define void @test_vzip_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23645 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 23646 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 23647 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23648 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23649 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] 23650 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23651 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23652 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] 23653 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* 23654 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* 23655 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23656 // CHECK: ret void 23657 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { 23658 return vzip_s8(a, b); 23659 } 23660 23661 // CHECK-LABEL: define void @test_vzip_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23662 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 23663 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 23664 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23665 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23666 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23667 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23668 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23669 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23670 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] 23671 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23672 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23673 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] 23674 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* 23675 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* 23676 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23677 // CHECK: ret void 23678 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { 23679 return vzip_s16(a, b); 23680 } 23681 23682 // CHECK-LABEL: define void @test_vzip_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 23683 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 23684 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 23685 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23686 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23687 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 23688 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23689 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 23690 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 23691 // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]] 23692 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 23693 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 23694 // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]] 23695 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* 23696 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* 23697 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23698 // CHECK: ret void 23699 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { 23700 return vzip_s32(a, b); 23701 } 23702 23703 // CHECK-LABEL: define void @test_vzip_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23704 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 23705 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 23706 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23707 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23708 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] 23709 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23710 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23711 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] 23712 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* 23713 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* 23714 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23715 // CHECK: ret void 23716 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { 23717 return vzip_u8(a, b); 23718 } 23719 23720 // CHECK-LABEL: define void @test_vzip_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23721 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 23722 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 23723 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23724 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23725 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23726 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23727 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23728 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23729 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] 23730 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23731 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23732 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] 23733 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* 23734 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* 23735 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23736 // CHECK: ret void 23737 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { 23738 return vzip_u16(a, b); 23739 } 23740 23741 // CHECK-LABEL: define void @test_vzip_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { 23742 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 23743 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 23744 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> 23745 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> 23746 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* 23747 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 23748 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 23749 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2> 23750 // CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]] 23751 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 23752 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3> 23753 // CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]] 23754 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* 23755 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* 23756 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23757 // CHECK: ret void 23758 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { 23759 return vzip_u32(a, b); 23760 } 23761 23762 // CHECK-LABEL: define void @test_vzip_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { 23763 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 23764 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 23765 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> 23766 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> 23767 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* 23768 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 23769 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 23770 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2> 23771 // CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]] 23772 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 23773 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3> 23774 // CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]] 23775 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* 23776 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* 23777 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23778 // CHECK: ret void 23779 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { 23780 return vzip_f32(a, b); 23781 } 23782 23783 // CHECK-LABEL: define void @test_vzip_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { 23784 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 23785 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 23786 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* 23787 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23788 // CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] 23789 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 23790 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23791 // CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] 23792 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* 23793 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* 23794 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 23795 // CHECK: ret void 23796 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { 23797 return vzip_p8(a, b); 23798 } 23799 23800 // CHECK-LABEL: define void @test_vzip_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { 23801 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 23802 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 23803 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> 23804 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> 23805 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* 23806 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 23807 // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> 23808 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23809 // CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] 23810 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 23811 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23812 // CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] 23813 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* 23814 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* 23815 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 23816 // CHECK: ret void 23817 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { 23818 return vzip_p16(a, b); 23819 } 23820 23821 // CHECK-LABEL: define void @test_vzipq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23822 // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 23823 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 23824 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23825 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 23826 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] 23827 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23828 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 23829 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] 23830 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* 23831 // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* 23832 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23833 // CHECK: ret void 23834 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { 23835 return vzipq_s8(a, b); 23836 } 23837 23838 // CHECK-LABEL: define void @test_vzipq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23839 // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 23840 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 23841 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23842 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23843 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23844 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23845 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23846 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23847 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] 23848 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23849 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23850 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] 23851 // CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* 23852 // CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* 23853 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23854 // CHECK: ret void 23855 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { 23856 return vzipq_s16(a, b); 23857 } 23858 23859 // CHECK-LABEL: define void @test_vzipq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23860 // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 23861 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 23862 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23863 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23864 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23865 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23866 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23867 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23868 // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]] 23869 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23870 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23871 // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]] 23872 // CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* 23873 // CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* 23874 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23875 // CHECK: ret void 23876 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { 23877 return vzipq_s32(a, b); 23878 } 23879 23880 // CHECK-LABEL: define void @test_vzipq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23881 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 23882 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 23883 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23884 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 23885 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] 23886 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23887 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 23888 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] 23889 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* 23890 // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* 23891 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23892 // CHECK: ret void 23893 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { 23894 return vzipq_u8(a, b); 23895 } 23896 23897 // CHECK-LABEL: define void @test_vzipq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23898 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 23899 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23900 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23901 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23902 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23903 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23904 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23905 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23906 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] 23907 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23908 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23909 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] 23910 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* 23911 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* 23912 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23913 // CHECK: ret void 23914 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { 23915 return vzipq_u16(a, b); 23916 } 23917 23918 // CHECK-LABEL: define void @test_vzipq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { 23919 // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 23920 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23921 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> 23922 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> 23923 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* 23924 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 23925 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 23926 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23927 // CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]] 23928 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 23929 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23930 // CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]] 23931 // CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* 23932 // CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* 23933 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23934 // CHECK: ret void 23935 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { 23936 return vzipq_u32(a, b); 23937 } 23938 23939 // CHECK-LABEL: define void @test_vzipq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { 23940 // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 23941 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23942 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> 23943 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> 23944 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* 23945 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 23946 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 23947 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> 23948 // CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]] 23949 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 23950 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> 23951 // CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]] 23952 // CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* 23953 // CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* 23954 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23955 // CHECK: ret void 23956 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { 23957 return vzipq_f32(a, b); 23958 } 23959 23960 // CHECK-LABEL: define void @test_vzipq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { 23961 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 23962 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23963 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* 23964 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 23965 // CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] 23966 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 23967 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 23968 // CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] 23969 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* 23970 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* 23971 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 23972 // CHECK: ret void 23973 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { 23974 return vzipq_p8(a, b); 23975 } 23976 23977 // CHECK-LABEL: define void @test_vzipq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { 23978 // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 23979 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23980 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> 23981 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> 23982 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* 23983 // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 23984 // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> 23985 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 23986 // CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] 23987 // CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 23988 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 23989 // CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] 23990 // CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* 23991 // CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* 23992 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 23993 // CHECK: ret void 23994 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { 23995 return vzipq_p16(a, b); 23996 } 23997 23998 23999