1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 2 3 define void @test_vext_s8() nounwind ssp { 4 ; CHECK-LABEL: test_vext_s8: 5 ; CHECK: {{ext.8.*#1}} 6 %xS8x8 = alloca <8 x i8>, align 8 7 %__a = alloca <8 x i8>, align 8 8 %__b = alloca <8 x i8>, align 8 9 %tmp = load <8 x i8>, <8 x i8>* %xS8x8, align 8 10 store <8 x i8> %tmp, <8 x i8>* %__a, align 8 11 %tmp1 = load <8 x i8>, <8 x i8>* %xS8x8, align 8 12 store <8 x i8> %tmp1, <8 x i8>* %__b, align 8 13 %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8 14 %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8 15 %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 16 store <8 x i8> %vext, <8 x i8>* %xS8x8, align 8 17 ret void 18 } 19 20 define void @test_vext_u8() nounwind ssp { 21 ; CHECK-LABEL: test_vext_u8: 22 ; CHECK: {{ext.8.*#2}} 23 %xU8x8 = alloca <8 x i8>, align 8 24 %__a = alloca <8 x i8>, align 8 25 %__b = alloca <8 x i8>, align 8 26 %tmp = load <8 x i8>, <8 x i8>* %xU8x8, align 8 27 store <8 x i8> %tmp, <8 x i8>* %__a, align 8 28 %tmp1 = load <8 x i8>, <8 x i8>* %xU8x8, align 8 29 store <8 x i8> %tmp1, <8 x i8>* %__b, align 8 30 %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8 31 %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8 32 %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> 33 store <8 x i8> %vext, <8 x i8>* %xU8x8, align 8 34 ret void 35 } 36 37 define void @test_vext_p8() nounwind ssp { 38 ; CHECK-LABEL: test_vext_p8: 39 ; CHECK: {{ext.8.*#3}} 40 %xP8x8 = alloca <8 x i8>, align 8 41 %__a = alloca <8 x i8>, align 8 42 %__b = alloca <8 x i8>, align 8 43 %tmp = load <8 x i8>, <8 x i8>* %xP8x8, align 8 44 store <8 x i8> %tmp, <8 x i8>* %__a, align 8 45 %tmp1 = load <8 x i8>, <8 x i8>* %xP8x8, align 8 46 store <8 x i8> %tmp1, <8 x i8>* %__b, align 8 47 %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8 48 %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8 49 %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> 50 store <8 x i8> %vext, <8 x i8>* %xP8x8, align 8 51 ret void 52 } 53 54 define void @test_vext_s16() nounwind ssp { 55 ; CHECK-LABEL: test_vext_s16: 56 ; CHECK: {{ext.8.*#2}} 57 %xS16x4 = alloca <4 x i16>, align 8 58 %__a = alloca <4 x i16>, align 8 59 %__b = alloca <4 x i16>, align 8 60 %tmp = load <4 x i16>, <4 x i16>* %xS16x4, align 8 61 store <4 x i16> %tmp, <4 x i16>* %__a, align 8 62 %tmp1 = load <4 x i16>, <4 x i16>* %xS16x4, align 8 63 store <4 x i16> %tmp1, <4 x i16>* %__b, align 8 64 %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8 65 %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8> 66 %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8 67 %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8> 68 %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16> 69 %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16> 70 %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 71 store <4 x i16> %vext, <4 x i16>* %xS16x4, align 8 72 ret void 73 } 74 75 define void @test_vext_u16() nounwind ssp { 76 ; CHECK-LABEL: test_vext_u16: 77 ; CHECK: {{ext.8.*#4}} 78 %xU16x4 = alloca <4 x i16>, align 8 79 %__a = alloca <4 x i16>, align 8 80 %__b = alloca <4 x i16>, align 8 81 %tmp = load <4 x i16>, <4 x i16>* %xU16x4, align 8 82 store <4 x i16> %tmp, <4 x i16>* %__a, align 8 83 %tmp1 = load <4 x i16>, <4 x i16>* %xU16x4, align 8 84 store <4 x i16> %tmp1, <4 x i16>* %__b, align 8 85 %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8 86 %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8> 87 %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8 88 %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8> 89 %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16> 90 %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16> 91 %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 92 store <4 x i16> %vext, <4 x i16>* %xU16x4, align 8 93 ret void 94 } 95 96 define void @test_vext_p16() nounwind ssp { 97 ; CHECK-LABEL: test_vext_p16: 98 ; CHECK: {{ext.8.*#6}} 99 %xP16x4 = alloca <4 x i16>, align 8 100 %__a = alloca <4 x i16>, align 8 101 %__b = alloca <4 x i16>, align 8 102 %tmp = load <4 x i16>, <4 x i16>* %xP16x4, align 8 103 store <4 x i16> %tmp, <4 x i16>* %__a, align 8 104 %tmp1 = load <4 x i16>, <4 x i16>* %xP16x4, align 8 105 store <4 x i16> %tmp1, <4 x i16>* %__b, align 8 106 %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8 107 %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8> 108 %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8 109 %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8> 110 %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16> 111 %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16> 112 %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 113 store <4 x i16> %vext, <4 x i16>* %xP16x4, align 8 114 ret void 115 } 116 117 define void @test_vext_s32() nounwind ssp { 118 ; CHECK-LABEL: test_vext_s32: 119 ; CHECK: {{rev64.2s.*}} 120 %xS32x2 = alloca <2 x i32>, align 8 121 %__a = alloca <2 x i32>, align 8 122 %__b = alloca <2 x i32>, align 8 123 %tmp = load <2 x i32>, <2 x i32>* %xS32x2, align 8 124 store <2 x i32> %tmp, <2 x i32>* %__a, align 8 125 %tmp1 = load <2 x i32>, <2 x i32>* %xS32x2, align 8 126 store <2 x i32> %tmp1, <2 x i32>* %__b, align 8 127 %tmp2 = load <2 x i32>, <2 x i32>* %__a, align 8 128 %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8> 129 %tmp4 = load <2 x i32>, <2 x i32>* %__b, align 8 130 %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8> 131 %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32> 132 %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32> 133 %vext = shufflevector <2 x i32> %tmp6, <2 x i32> %tmp7, <2 x i32> <i32 1, i32 2> 134 store <2 x i32> %vext, <2 x i32>* %xS32x2, align 8 135 ret void 136 } 137 138 define void @test_vext_u32() nounwind ssp { 139 ; CHECK-LABEL: test_vext_u32: 140 ; CHECK: {{rev64.2s.*}} 141 %xU32x2 = alloca <2 x i32>, align 8 142 %__a = alloca <2 x i32>, align 8 143 %__b = alloca <2 x i32>, align 8 144 %tmp = load <2 x i32>, <2 x i32>* %xU32x2, align 8 145 store <2 x i32> %tmp, <2 x i32>* %__a, align 8 146 %tmp1 = load <2 x i32>, <2 x i32>* %xU32x2, align 8 147 store <2 x i32> %tmp1, <2 x i32>* %__b, align 8 148 %tmp2 = load <2 x i32>, <2 x i32>* %__a, align 8 149 %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8> 150 %tmp4 = load <2 x i32>, <2 x i32>* %__b, align 8 151 %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8> 152 %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32> 153 %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32> 154 %vext = shufflevector <2 x i32> %tmp6, <2 x i32> %tmp7, <2 x i32> <i32 1, i32 2> 155 store <2 x i32> %vext, <2 x i32>* %xU32x2, align 8 156 ret void 157 } 158 159 define void @test_vext_f32() nounwind ssp { 160 ; CHECK-LABEL: test_vext_f32: 161 ; CHECK: {{rev64.2s.*}} 162 %xF32x2 = alloca <2 x float>, align 8 163 %__a = alloca <2 x float>, align 8 164 %__b = alloca <2 x float>, align 8 165 %tmp = load <2 x float>, <2 x float>* %xF32x2, align 8 166 store <2 x float> %tmp, <2 x float>* %__a, align 8 167 %tmp1 = load <2 x float>, <2 x float>* %xF32x2, align 8 168 store <2 x float> %tmp1, <2 x float>* %__b, align 8 169 %tmp2 = load <2 x float>, <2 x float>* %__a, align 8 170 %tmp3 = bitcast <2 x float> %tmp2 to <8 x i8> 171 %tmp4 = load <2 x float>, <2 x float>* %__b, align 8 172 %tmp5 = bitcast <2 x float> %tmp4 to <8 x i8> 173 %tmp6 = bitcast <8 x i8> %tmp3 to <2 x float> 174 %tmp7 = bitcast <8 x i8> %tmp5 to <2 x float> 175 %vext = shufflevector <2 x float> %tmp6, <2 x float> %tmp7, <2 x i32> <i32 1, i32 2> 176 store <2 x float> %vext, <2 x float>* %xF32x2, align 8 177 ret void 178 } 179 180 define void @test_vext_s64() nounwind ssp { 181 ; CHECK-LABEL: test_vext_s64: 182 ; CHECK_FIXME: {{rev64.2s.*}} 183 ; this just turns into a load of the second element 184 %xS64x1 = alloca <1 x i64>, align 8 185 %__a = alloca <1 x i64>, align 8 186 %__b = alloca <1 x i64>, align 8 187 %tmp = load <1 x i64>, <1 x i64>* %xS64x1, align 8 188 store <1 x i64> %tmp, <1 x i64>* %__a, align 8 189 %tmp1 = load <1 x i64>, <1 x i64>* %xS64x1, align 8 190 store <1 x i64> %tmp1, <1 x i64>* %__b, align 8 191 %tmp2 = load <1 x i64>, <1 x i64>* %__a, align 8 192 %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8> 193 %tmp4 = load <1 x i64>, <1 x i64>* %__b, align 8 194 %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8> 195 %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64> 196 %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64> 197 %vext = shufflevector <1 x i64> %tmp6, <1 x i64> %tmp7, <1 x i32> <i32 1> 198 store <1 x i64> %vext, <1 x i64>* %xS64x1, align 8 199 ret void 200 } 201 202 define void @test_vext_u64() nounwind ssp { 203 ; CHECK-LABEL: test_vext_u64: 204 ; CHECK_FIXME: {{ext.8.*#1}} 205 ; this is turned into a simple load of the 2nd element 206 %xU64x1 = alloca <1 x i64>, align 8 207 %__a = alloca <1 x i64>, align 8 208 %__b = alloca <1 x i64>, align 8 209 %tmp = load <1 x i64>, <1 x i64>* %xU64x1, align 8 210 store <1 x i64> %tmp, <1 x i64>* %__a, align 8 211 %tmp1 = load <1 x i64>, <1 x i64>* %xU64x1, align 8 212 store <1 x i64> %tmp1, <1 x i64>* %__b, align 8 213 %tmp2 = load <1 x i64>, <1 x i64>* %__a, align 8 214 %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8> 215 %tmp4 = load <1 x i64>, <1 x i64>* %__b, align 8 216 %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8> 217 %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64> 218 %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64> 219 %vext = shufflevector <1 x i64> %tmp6, <1 x i64> %tmp7, <1 x i32> <i32 1> 220 store <1 x i64> %vext, <1 x i64>* %xU64x1, align 8 221 ret void 222 } 223 224 define void @test_vextq_s8() nounwind ssp { 225 ; CHECK-LABEL: test_vextq_s8: 226 ; CHECK: {{ext.16.*#4}} 227 %xS8x16 = alloca <16 x i8>, align 16 228 %__a = alloca <16 x i8>, align 16 229 %__b = alloca <16 x i8>, align 16 230 %tmp = load <16 x i8>, <16 x i8>* %xS8x16, align 16 231 store <16 x i8> %tmp, <16 x i8>* %__a, align 16 232 %tmp1 = load <16 x i8>, <16 x i8>* %xS8x16, align 16 233 store <16 x i8> %tmp1, <16 x i8>* %__b, align 16 234 %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16 235 %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16 236 %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19> 237 store <16 x i8> %vext, <16 x i8>* %xS8x16, align 16 238 ret void 239 } 240 241 define void @test_vextq_u8() nounwind ssp { 242 ; CHECK-LABEL: test_vextq_u8: 243 ; CHECK: {{ext.16.*#5}} 244 %xU8x16 = alloca <16 x i8>, align 16 245 %__a = alloca <16 x i8>, align 16 246 %__b = alloca <16 x i8>, align 16 247 %tmp = load <16 x i8>, <16 x i8>* %xU8x16, align 16 248 store <16 x i8> %tmp, <16 x i8>* %__a, align 16 249 %tmp1 = load <16 x i8>, <16 x i8>* %xU8x16, align 16 250 store <16 x i8> %tmp1, <16 x i8>* %__b, align 16 251 %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16 252 %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16 253 %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 254 store <16 x i8> %vext, <16 x i8>* %xU8x16, align 16 255 ret void 256 } 257 258 define void @test_vextq_p8() nounwind ssp { 259 ; CHECK-LABEL: test_vextq_p8: 260 ; CHECK: {{ext.16.*#6}} 261 %xP8x16 = alloca <16 x i8>, align 16 262 %__a = alloca <16 x i8>, align 16 263 %__b = alloca <16 x i8>, align 16 264 %tmp = load <16 x i8>, <16 x i8>* %xP8x16, align 16 265 store <16 x i8> %tmp, <16 x i8>* %__a, align 16 266 %tmp1 = load <16 x i8>, <16 x i8>* %xP8x16, align 16 267 store <16 x i8> %tmp1, <16 x i8>* %__b, align 16 268 %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16 269 %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16 270 %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21> 271 store <16 x i8> %vext, <16 x i8>* %xP8x16, align 16 272 ret void 273 } 274 275 define void @test_vextq_s16() nounwind ssp { 276 ; CHECK-LABEL: test_vextq_s16: 277 ; CHECK: {{ext.16.*#14}} 278 %xS16x8 = alloca <8 x i16>, align 16 279 %__a = alloca <8 x i16>, align 16 280 %__b = alloca <8 x i16>, align 16 281 %tmp = load <8 x i16>, <8 x i16>* %xS16x8, align 16 282 store <8 x i16> %tmp, <8 x i16>* %__a, align 16 283 %tmp1 = load <8 x i16>, <8 x i16>* %xS16x8, align 16 284 store <8 x i16> %tmp1, <8 x i16>* %__b, align 16 285 %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16 286 %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8> 287 %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16 288 %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8> 289 %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16> 290 %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16> 291 %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 292 store <8 x i16> %vext, <8 x i16>* %xS16x8, align 16 293 ret void 294 } 295 296 define void @test_vextq_u16() nounwind ssp { 297 ; CHECK-LABEL: test_vextq_u16: 298 ; CHECK: {{ext.16.*#8}} 299 %xU16x8 = alloca <8 x i16>, align 16 300 %__a = alloca <8 x i16>, align 16 301 %__b = alloca <8 x i16>, align 16 302 %tmp = load <8 x i16>, <8 x i16>* %xU16x8, align 16 303 store <8 x i16> %tmp, <8 x i16>* %__a, align 16 304 %tmp1 = load <8 x i16>, <8 x i16>* %xU16x8, align 16 305 store <8 x i16> %tmp1, <8 x i16>* %__b, align 16 306 %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16 307 %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8> 308 %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16 309 %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8> 310 %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16> 311 %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16> 312 %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 313 store <8 x i16> %vext, <8 x i16>* %xU16x8, align 16 314 ret void 315 } 316 317 define void @test_vextq_p16() nounwind ssp { 318 ; CHECK-LABEL: test_vextq_p16: 319 ; CHECK: {{ext.16.*#10}} 320 %xP16x8 = alloca <8 x i16>, align 16 321 %__a = alloca <8 x i16>, align 16 322 %__b = alloca <8 x i16>, align 16 323 %tmp = load <8 x i16>, <8 x i16>* %xP16x8, align 16 324 store <8 x i16> %tmp, <8 x i16>* %__a, align 16 325 %tmp1 = load <8 x i16>, <8 x i16>* %xP16x8, align 16 326 store <8 x i16> %tmp1, <8 x i16>* %__b, align 16 327 %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16 328 %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8> 329 %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16 330 %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8> 331 %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16> 332 %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16> 333 %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> 334 store <8 x i16> %vext, <8 x i16>* %xP16x8, align 16 335 ret void 336 } 337 338 define void @test_vextq_s32() nounwind ssp { 339 ; CHECK-LABEL: test_vextq_s32: 340 ; CHECK: {{ext.16.*#4}} 341 %xS32x4 = alloca <4 x i32>, align 16 342 %__a = alloca <4 x i32>, align 16 343 %__b = alloca <4 x i32>, align 16 344 %tmp = load <4 x i32>, <4 x i32>* %xS32x4, align 16 345 store <4 x i32> %tmp, <4 x i32>* %__a, align 16 346 %tmp1 = load <4 x i32>, <4 x i32>* %xS32x4, align 16 347 store <4 x i32> %tmp1, <4 x i32>* %__b, align 16 348 %tmp2 = load <4 x i32>, <4 x i32>* %__a, align 16 349 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 350 %tmp4 = load <4 x i32>, <4 x i32>* %__b, align 16 351 %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8> 352 %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32> 353 %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32> 354 %vext = shufflevector <4 x i32> %tmp6, <4 x i32> %tmp7, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 355 store <4 x i32> %vext, <4 x i32>* %xS32x4, align 16 356 ret void 357 } 358 359 define void @test_vextq_u32() nounwind ssp { 360 ; CHECK-LABEL: test_vextq_u32: 361 ; CHECK: {{ext.16.*#8}} 362 %xU32x4 = alloca <4 x i32>, align 16 363 %__a = alloca <4 x i32>, align 16 364 %__b = alloca <4 x i32>, align 16 365 %tmp = load <4 x i32>, <4 x i32>* %xU32x4, align 16 366 store <4 x i32> %tmp, <4 x i32>* %__a, align 16 367 %tmp1 = load <4 x i32>, <4 x i32>* %xU32x4, align 16 368 store <4 x i32> %tmp1, <4 x i32>* %__b, align 16 369 %tmp2 = load <4 x i32>, <4 x i32>* %__a, align 16 370 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8> 371 %tmp4 = load <4 x i32>, <4 x i32>* %__b, align 16 372 %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8> 373 %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32> 374 %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32> 375 %vext = shufflevector <4 x i32> %tmp6, <4 x i32> %tmp7, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 376 store <4 x i32> %vext, <4 x i32>* %xU32x4, align 16 377 ret void 378 } 379 380 define void @test_vextq_f32() nounwind ssp { 381 ; CHECK-LABEL: test_vextq_f32: 382 ; CHECK: {{ext.16.*#12}} 383 %xF32x4 = alloca <4 x float>, align 16 384 %__a = alloca <4 x float>, align 16 385 %__b = alloca <4 x float>, align 16 386 %tmp = load <4 x float>, <4 x float>* %xF32x4, align 16 387 store <4 x float> %tmp, <4 x float>* %__a, align 16 388 %tmp1 = load <4 x float>, <4 x float>* %xF32x4, align 16 389 store <4 x float> %tmp1, <4 x float>* %__b, align 16 390 %tmp2 = load <4 x float>, <4 x float>* %__a, align 16 391 %tmp3 = bitcast <4 x float> %tmp2 to <16 x i8> 392 %tmp4 = load <4 x float>, <4 x float>* %__b, align 16 393 %tmp5 = bitcast <4 x float> %tmp4 to <16 x i8> 394 %tmp6 = bitcast <16 x i8> %tmp3 to <4 x float> 395 %tmp7 = bitcast <16 x i8> %tmp5 to <4 x float> 396 %vext = shufflevector <4 x float> %tmp6, <4 x float> %tmp7, <4 x i32> <i32 3, i32 4, i32 5, i32 6> 397 store <4 x float> %vext, <4 x float>* %xF32x4, align 16 398 ret void 399 } 400 401 define void @test_vextq_s64() nounwind ssp { 402 ; CHECK-LABEL: test_vextq_s64: 403 ; CHECK: {{ext.16.*#8}} 404 %xS64x2 = alloca <2 x i64>, align 16 405 %__a = alloca <2 x i64>, align 16 406 %__b = alloca <2 x i64>, align 16 407 %tmp = load <2 x i64>, <2 x i64>* %xS64x2, align 16 408 store <2 x i64> %tmp, <2 x i64>* %__a, align 16 409 %tmp1 = load <2 x i64>, <2 x i64>* %xS64x2, align 16 410 store <2 x i64> %tmp1, <2 x i64>* %__b, align 16 411 %tmp2 = load <2 x i64>, <2 x i64>* %__a, align 16 412 %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8> 413 %tmp4 = load <2 x i64>, <2 x i64>* %__b, align 16 414 %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8> 415 %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64> 416 %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64> 417 %vext = shufflevector <2 x i64> %tmp6, <2 x i64> %tmp7, <2 x i32> <i32 1, i32 2> 418 store <2 x i64> %vext, <2 x i64>* %xS64x2, align 16 419 ret void 420 } 421 422 define void @test_vextq_u64() nounwind ssp { 423 ; CHECK-LABEL: test_vextq_u64: 424 ; CHECK: {{ext.16.*#8}} 425 %xU64x2 = alloca <2 x i64>, align 16 426 %__a = alloca <2 x i64>, align 16 427 %__b = alloca <2 x i64>, align 16 428 %tmp = load <2 x i64>, <2 x i64>* %xU64x2, align 16 429 store <2 x i64> %tmp, <2 x i64>* %__a, align 16 430 %tmp1 = load <2 x i64>, <2 x i64>* %xU64x2, align 16 431 store <2 x i64> %tmp1, <2 x i64>* %__b, align 16 432 %tmp2 = load <2 x i64>, <2 x i64>* %__a, align 16 433 %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8> 434 %tmp4 = load <2 x i64>, <2 x i64>* %__b, align 16 435 %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8> 436 %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64> 437 %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64> 438 %vext = shufflevector <2 x i64> %tmp6, <2 x i64> %tmp7, <2 x i32> <i32 1, i32 2> 439 store <2 x i64> %vext, <2 x i64>* %xU64x2, align 16 440 ret void 441 } 442 443 ; shuffles with an undef second operand can use an EXT also so long as the 444 ; indices wrap and stay sequential. 445 ; rdar://12051674 446 define <16 x i8> @vext1(<16 x i8> %_a) nounwind { 447 ; CHECK-LABEL: vext1: 448 ; CHECK: ext.16b v0, v0, v0, #8 449 %vext = shufflevector <16 x i8> %_a, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 450 ret <16 x i8> %vext 451 } 452 453 ; <rdar://problem/12212062> 454 define <2 x i64> @vext2(<2 x i64> %p0, <2 x i64> %p1) nounwind readnone ssp { 455 entry: 456 ; CHECK-LABEL: vext2: 457 ; CHECK: ext.16b v1, v1, v1, #8 458 ; CHECK: ext.16b v0, v0, v0, #8 459 ; CHECK: add.2d v0, v0, v1 460 %t0 = shufflevector <2 x i64> %p1, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 461 %t1 = shufflevector <2 x i64> %p0, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 462 %t2 = add <2 x i64> %t1, %t0 463 ret <2 x i64> %t2 464 } 465