1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 5 ; CHECK: vshf_v16i8_0: 6 7 %1 = load <16 x i8>* %a 8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 9 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 10 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 11 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 12 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]] 13 store <16 x i8> %2, <16 x i8>* %c 14 ; CHECK-DAG: st.b [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size vshf_v16i8_0 18 } 19 20 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 21 ; CHECK: vshf_v16i8_1: 22 23 %1 = load <16 x i8>* %a 24 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 25 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 26 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1] 27 store <16 x i8> %2, <16 x i8>* %c 28 ; CHECK-DAG: st.b [[R3]], 0($4) 29 30 ret void 31 ; CHECK: .size vshf_v16i8_1 32 } 33 34 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 35 ; CHECK: vshf_v16i8_2: 36 37 %1 = load <16 x i8>* %a 38 %2 = load <16 x i8>* %b 39 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 40 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16> 41 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 42 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 43 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]] 44 store <16 x i8> %3, <16 x i8>* %c 45 ; CHECK-DAG: st.b [[R3]], 0($4) 46 47 ret void 48 ; CHECK: .size vshf_v16i8_2 49 } 50 51 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 52 ; CHECK: vshf_v16i8_3: 53 54 %1 = load <16 x i8>* %a 55 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 56 %2 = load <16 x i8>* %b 57 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 58 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3> 59 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 60 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]]) 61 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 62 ; the operands to get the right answer. 63 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R1]] 64 store <16 x i8> %3, <16 x i8>* %c 65 ; CHECK-DAG: st.b [[R3]], 0($4) 66 67 ret void 68 ; CHECK: .size vshf_v16i8_3 69 } 70 71 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 72 ; CHECK: vshf_v16i8_4: 73 74 %1 = load <16 x i8>* %a 75 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 76 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17> 77 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1] 78 store <16 x i8> %2, <16 x i8>* %c 79 ; CHECK-DAG: st.b [[R3]], 0($4) 80 81 ret void 82 ; CHECK: .size vshf_v16i8_4 83 } 84 85 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 86 ; CHECK: vshf_v8i16_0: 87 88 %1 = load <8 x i16>* %a 89 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 90 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 91 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 92 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 93 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]] 94 store <8 x i16> %2, <8 x i16>* %c 95 ; CHECK-DAG: st.h [[R3]], 0($4) 96 97 ret void 98 ; CHECK: .size vshf_v8i16_0 99 } 100 101 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 102 ; CHECK: vshf_v8i16_1: 103 104 %1 = load <8 x i16>* %a 105 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 106 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 107 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1] 108 store <8 x i16> %2, <8 x i16>* %c 109 ; CHECK-DAG: st.h [[R3]], 0($4) 110 111 ret void 112 ; CHECK: .size vshf_v8i16_1 113 } 114 115 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 116 ; CHECK: vshf_v8i16_2: 117 118 %1 = load <8 x i16>* %a 119 %2 = load <8 x i16>* %b 120 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 121 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8> 122 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 123 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 124 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]] 125 store <8 x i16> %3, <8 x i16>* %c 126 ; CHECK-DAG: st.h [[R3]], 0($4) 127 128 ret void 129 ; CHECK: .size vshf_v8i16_2 130 } 131 132 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 133 ; CHECK: vshf_v8i16_3: 134 135 %1 = load <8 x i16>* %a 136 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 137 %2 = load <8 x i16>* %b 138 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 139 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3> 140 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 141 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]]) 142 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 143 ; the operands to get the right answer. 144 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R1]] 145 store <8 x i16> %3, <8 x i16>* %c 146 ; CHECK-DAG: st.h [[R3]], 0($4) 147 148 ret void 149 ; CHECK: .size vshf_v8i16_3 150 } 151 152 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 153 ; CHECK: vshf_v8i16_4: 154 155 %1 = load <8 x i16>* %a 156 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 157 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9> 158 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1] 159 store <8 x i16> %2, <8 x i16>* %c 160 ; CHECK-DAG: st.h [[R3]], 0($4) 161 162 ret void 163 ; CHECK: .size vshf_v8i16_4 164 } 165 166 ; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w 167 ; instruction when using a single vector. 168 169 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 170 ; CHECK: vshf_v4i32_0: 171 172 %1 = load <4 x i32>* %a 173 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 174 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 175 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 176 store <4 x i32> %2, <4 x i32>* %c 177 ; CHECK-DAG: st.w [[R3]], 0($4) 178 179 ret void 180 ; CHECK: .size vshf_v4i32_0 181 } 182 183 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 184 ; CHECK: vshf_v4i32_1: 185 186 %1 = load <4 x i32>* %a 187 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 188 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 189 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85 190 store <4 x i32> %2, <4 x i32>* %c 191 ; CHECK-DAG: st.w [[R3]], 0($4) 192 193 ret void 194 ; CHECK: .size vshf_v4i32_1 195 } 196 197 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 198 ; CHECK: vshf_v4i32_2: 199 200 %1 = load <4 x i32>* %a 201 %2 = load <4 x i32>* %b 202 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 203 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4> 204 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36 205 store <4 x i32> %3, <4 x i32>* %c 206 ; CHECK-DAG: st.w [[R3]], 0($4) 207 208 ret void 209 ; CHECK: .size vshf_v4i32_2 210 } 211 212 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 213 ; CHECK: vshf_v4i32_3: 214 215 %1 = load <4 x i32>* %a 216 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 217 %2 = load <4 x i32>* %b 218 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 219 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4> 220 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 221 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]]) 222 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 223 ; the operands to get the right answer. 224 ; CHECK-DAG: vshf.w [[R3]], [[R2]], [[R1]] 225 store <4 x i32> %3, <4 x i32>* %c 226 ; CHECK-DAG: st.w [[R3]], 0($4) 227 228 ret void 229 ; CHECK: .size vshf_v4i32_3 230 } 231 232 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 233 ; CHECK: vshf_v4i32_4: 234 235 %1 = load <4 x i32>* %a 236 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 237 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1> 238 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85 239 store <4 x i32> %2, <4 x i32>* %c 240 ; CHECK-DAG: st.w [[R3]], 0($4) 241 242 ret void 243 ; CHECK: .size vshf_v4i32_4 244 } 245 246 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 247 ; CHECK: vshf_v2i64_0: 248 249 %1 = load <2 x i64>* %a 250 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 251 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 252 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 253 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 254 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]] 255 store <2 x i64> %2, <2 x i64>* %c 256 ; CHECK-DAG: st.d [[R3]], 0($4) 257 258 ret void 259 ; CHECK: .size vshf_v2i64_0 260 } 261 262 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 263 ; CHECK: vshf_v2i64_1: 264 265 %1 = load <2 x i64>* %a 266 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 267 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 268 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 269 store <2 x i64> %2, <2 x i64>* %c 270 ; CHECK-DAG: st.d [[R3]], 0($4) 271 272 ret void 273 ; CHECK: .size vshf_v2i64_1 274 } 275 276 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 277 ; CHECK: vshf_v2i64_2: 278 279 %1 = load <2 x i64>* %a 280 %2 = load <2 x i64>* %b 281 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 282 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2> 283 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 284 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 285 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]] 286 store <2 x i64> %3, <2 x i64>* %c 287 ; CHECK-DAG: st.d [[R3]], 0($4) 288 289 ret void 290 ; CHECK: .size vshf_v2i64_2 291 } 292 293 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 294 ; CHECK: vshf_v2i64_3: 295 296 %1 = load <2 x i64>* %a 297 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 298 %2 = load <2 x i64>* %b 299 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 300 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2> 301 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($ 302 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]]) 303 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse 304 ; the operands to get the right answer. 305 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R1]] 306 store <2 x i64> %3, <2 x i64>* %c 307 ; CHECK-DAG: st.d [[R3]], 0($4) 308 309 ret void 310 ; CHECK: .size vshf_v2i64_3 311 } 312 313 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 314 ; CHECK: vshf_v2i64_4: 315 316 %1 = load <2 x i64>* %a 317 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 318 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3> 319 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 320 store <2 x i64> %2, <2 x i64>* %c 321 ; CHECK-DAG: st.d [[R3]], 0($4) 322 323 ret void 324 ; CHECK: .size vshf_v2i64_4 325 } 326 327 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 328 ; CHECK: shf_v16i8_0: 329 330 %1 = load <16 x i8>* %a 331 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 332 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12> 333 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45 334 store <16 x i8> %2, <16 x i8>* %c 335 ; CHECK-DAG: st.b [[R3]], 0($4) 336 337 ret void 338 ; CHECK: .size shf_v16i8_0 339 } 340 341 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 342 ; CHECK: shf_v8i16_0: 343 344 %1 = load <8 x i16>* %a 345 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 346 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 347 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27 348 store <8 x i16> %2, <8 x i16>* %c 349 ; CHECK-DAG: st.h [[R3]], 0($4) 350 351 ret void 352 ; CHECK: .size shf_v8i16_0 353 } 354 355 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 356 ; CHECK: shf_v4i32_0: 357 358 %1 = load <4 x i32>* %a 359 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 360 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 361 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 362 store <4 x i32> %2, <4 x i32>* %c 363 ; CHECK-DAG: st.w [[R3]], 0($4) 364 365 ret void 366 ; CHECK: .size shf_v4i32_0 367 } 368 369 ; shf.d does not exist 370 371 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 372 ; CHECK: ilvev_v16i8_0: 373 374 %1 = load <16 x i8>* %a 375 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 376 %2 = load <16 x i8>* %b 377 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 378 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 379 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 380 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 381 store <16 x i8> %3, <16 x i8>* %c 382 ; CHECK-DAG: st.b [[R3]], 0($4) 383 384 ret void 385 ; CHECK: .size ilvev_v16i8_0 386 } 387 388 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 389 ; CHECK: ilvev_v8i16_0: 390 391 %1 = load <8 x i16>* %a 392 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 393 %2 = load <8 x i16>* %b 394 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 395 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 396 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 397 store <8 x i16> %3, <8 x i16>* %c 398 ; CHECK-DAG: st.h [[R3]], 0($4) 399 400 ret void 401 ; CHECK: .size ilvev_v8i16_0 402 } 403 404 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 405 ; CHECK: ilvev_v4i32_0: 406 407 %1 = load <4 x i32>* %a 408 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 409 %2 = load <4 x i32>* %b 410 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 411 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 412 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 413 store <4 x i32> %3, <4 x i32>* %c 414 ; CHECK-DAG: st.w [[R3]], 0($4) 415 416 ret void 417 ; CHECK: .size ilvev_v4i32_0 418 } 419 420 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 421 ; CHECK: ilvev_v2i64_0: 422 423 %1 = load <2 x i64>* %a 424 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 425 %2 = load <2 x i64>* %b 426 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 427 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 428 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 429 store <2 x i64> %3, <2 x i64>* %c 430 ; CHECK-DAG: st.d [[R3]], 0($4) 431 432 ret void 433 ; CHECK: .size ilvev_v2i64_0 434 } 435 436 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 437 ; CHECK: ilvod_v16i8_0: 438 439 %1 = load <16 x i8>* %a 440 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 441 %2 = load <16 x i8>* %b 442 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 443 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 444 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 445 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 446 store <16 x i8> %3, <16 x i8>* %c 447 ; CHECK-DAG: st.b [[R3]], 0($4) 448 449 ret void 450 ; CHECK: .size ilvod_v16i8_0 451 } 452 453 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 454 ; CHECK: ilvod_v8i16_0: 455 456 %1 = load <8 x i16>* %a 457 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 458 %2 = load <8 x i16>* %b 459 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 460 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 461 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 462 store <8 x i16> %3, <8 x i16>* %c 463 ; CHECK-DAG: st.h [[R3]], 0($4) 464 465 ret void 466 ; CHECK: .size ilvod_v8i16_0 467 } 468 469 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 470 ; CHECK: ilvod_v4i32_0: 471 472 %1 = load <4 x i32>* %a 473 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 474 %2 = load <4 x i32>* %b 475 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 476 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 477 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 478 store <4 x i32> %3, <4 x i32>* %c 479 ; CHECK-DAG: st.w [[R3]], 0($4) 480 481 ret void 482 ; CHECK: .size ilvod_v4i32_0 483 } 484 485 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 486 ; CHECK: ilvod_v2i64_0: 487 488 %1 = load <2 x i64>* %a 489 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 490 %2 = load <2 x i64>* %b 491 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 492 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 493 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 494 store <2 x i64> %3, <2 x i64>* %c 495 ; CHECK-DAG: st.d [[R3]], 0($4) 496 497 ret void 498 ; CHECK: .size ilvod_v2i64_0 499 } 500 501 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 502 ; CHECK: ilvl_v16i8_0: 503 504 %1 = load <16 x i8>* %a 505 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 506 %2 = load <16 x i8>* %b 507 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 508 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 509 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 510 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 511 store <16 x i8> %3, <16 x i8>* %c 512 ; CHECK-DAG: st.b [[R3]], 0($4) 513 514 ret void 515 ; CHECK: .size ilvl_v16i8_0 516 } 517 518 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 519 ; CHECK: ilvl_v8i16_0: 520 521 %1 = load <8 x i16>* %a 522 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 523 %2 = load <8 x i16>* %b 524 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 525 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 526 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 527 store <8 x i16> %3, <8 x i16>* %c 528 ; CHECK-DAG: st.h [[R3]], 0($4) 529 530 ret void 531 ; CHECK: .size ilvl_v8i16_0 532 } 533 534 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 535 ; CHECK: ilvl_v4i32_0: 536 537 %1 = load <4 x i32>* %a 538 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 539 %2 = load <4 x i32>* %b 540 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 541 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 542 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 543 store <4 x i32> %3, <4 x i32>* %c 544 ; CHECK-DAG: st.w [[R3]], 0($4) 545 546 ret void 547 ; CHECK: .size ilvl_v4i32_0 548 } 549 550 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 551 ; CHECK: ilvl_v2i64_0: 552 553 %1 = load <2 x i64>* %a 554 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 555 %2 = load <2 x i64>* %b 556 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 557 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 558 ; ilvl.d and ilvev.d are equivalent for v2i64 559 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 560 store <2 x i64> %3, <2 x i64>* %c 561 ; CHECK-DAG: st.d [[R3]], 0($4) 562 563 ret void 564 ; CHECK: .size ilvl_v2i64_0 565 } 566 567 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 568 ; CHECK: ilvr_v16i8_0: 569 570 %1 = load <16 x i8>* %a 571 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 572 %2 = load <16 x i8>* %b 573 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 574 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 575 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 576 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 577 store <16 x i8> %3, <16 x i8>* %c 578 ; CHECK-DAG: st.b [[R3]], 0($4) 579 580 ret void 581 ; CHECK: .size ilvr_v16i8_0 582 } 583 584 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 585 ; CHECK: ilvr_v8i16_0: 586 587 %1 = load <8 x i16>* %a 588 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 589 %2 = load <8 x i16>* %b 590 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 591 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 592 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 593 store <8 x i16> %3, <8 x i16>* %c 594 ; CHECK-DAG: st.h [[R3]], 0($4) 595 596 ret void 597 ; CHECK: .size ilvr_v8i16_0 598 } 599 600 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 601 ; CHECK: ilvr_v4i32_0: 602 603 %1 = load <4 x i32>* %a 604 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 605 %2 = load <4 x i32>* %b 606 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 607 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 608 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 609 store <4 x i32> %3, <4 x i32>* %c 610 ; CHECK-DAG: st.w [[R3]], 0($4) 611 612 ret void 613 ; CHECK: .size ilvr_v4i32_0 614 } 615 616 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 617 ; CHECK: ilvr_v2i64_0: 618 619 %1 = load <2 x i64>* %a 620 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 621 %2 = load <2 x i64>* %b 622 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 623 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 624 ; ilvr.d and ilvod.d are equivalent for v2i64 625 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 626 store <2 x i64> %3, <2 x i64>* %c 627 ; CHECK-DAG: st.d [[R3]], 0($4) 628 629 ret void 630 ; CHECK: .size ilvr_v2i64_0 631 } 632 633 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 634 ; CHECK: pckev_v16i8_0: 635 636 %1 = load <16 x i8>* %a 637 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 638 %2 = load <16 x i8>* %b 639 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 640 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 641 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 642 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 643 store <16 x i8> %3, <16 x i8>* %c 644 ; CHECK-DAG: st.b [[R3]], 0($4) 645 646 ret void 647 ; CHECK: .size pckev_v16i8_0 648 } 649 650 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 651 ; CHECK: pckev_v8i16_0: 652 653 %1 = load <8 x i16>* %a 654 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 655 %2 = load <8 x i16>* %b 656 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 657 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 658 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 659 store <8 x i16> %3, <8 x i16>* %c 660 ; CHECK-DAG: st.h [[R3]], 0($4) 661 662 ret void 663 ; CHECK: .size pckev_v8i16_0 664 } 665 666 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 667 ; CHECK: pckev_v4i32_0: 668 669 %1 = load <4 x i32>* %a 670 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 671 %2 = load <4 x i32>* %b 672 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 673 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 674 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 675 store <4 x i32> %3, <4 x i32>* %c 676 ; CHECK-DAG: st.w [[R3]], 0($4) 677 678 ret void 679 ; CHECK: .size pckev_v4i32_0 680 } 681 682 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 683 ; CHECK: pckev_v2i64_0: 684 685 %1 = load <2 x i64>* %a 686 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 687 %2 = load <2 x i64>* %b 688 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 689 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> 690 ; pckev.d and ilvev.d are equivalent for v2i64 691 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 692 store <2 x i64> %3, <2 x i64>* %c 693 ; CHECK-DAG: st.d [[R3]], 0($4) 694 695 ret void 696 ; CHECK: .size pckev_v2i64_0 697 } 698 699 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { 700 ; CHECK: pckod_v16i8_0: 701 702 %1 = load <16 x i8>* %a 703 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 704 %2 = load <16 x i8>* %b 705 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) 706 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, 707 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 708 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] 709 store <16 x i8> %3, <16 x i8>* %c 710 ; CHECK-DAG: st.b [[R3]], 0($4) 711 712 ret void 713 ; CHECK: .size pckod_v16i8_0 714 } 715 716 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { 717 ; CHECK: pckod_v8i16_0: 718 719 %1 = load <8 x i16>* %a 720 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 721 %2 = load <8 x i16>* %b 722 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) 723 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 724 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] 725 store <8 x i16> %3, <8 x i16>* %c 726 ; CHECK-DAG: st.h [[R3]], 0($4) 727 728 ret void 729 ; CHECK: .size pckod_v8i16_0 730 } 731 732 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { 733 ; CHECK: pckod_v4i32_0: 734 735 %1 = load <4 x i32>* %a 736 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 737 %2 = load <4 x i32>* %b 738 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 739 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 740 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 741 store <4 x i32> %3, <4 x i32>* %c 742 ; CHECK-DAG: st.w [[R3]], 0($4) 743 744 ret void 745 ; CHECK: .size pckod_v4i32_0 746 } 747 748 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { 749 ; CHECK: pckod_v2i64_0: 750 751 %1 = load <2 x i64>* %a 752 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 753 %2 = load <2 x i64>* %b 754 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 755 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3> 756 ; pckod.d and ilvod.d are equivalent for v2i64 757 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 758 store <2 x i64> %3, <2 x i64>* %c 759 ; CHECK-DAG: st.d [[R3]], 0($4) 760 761 ret void 762 ; CHECK: .size pckod_v2i64_0 763 } 764 765 define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind { 766 ; CHECK: splati_v16i8_0: 767 768 %1 = load <16 x i8>* %a 769 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) 770 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, 771 <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 772 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4] 773 store <16 x i8> %2, <16 x i8>* %c 774 ; CHECK-DAG: st.b [[R3]], 0($4) 775 776 ret void 777 ; CHECK: .size splati_v16i8_0 778 } 779 780 define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind { 781 ; CHECK: splati_v8i16_0: 782 783 %1 = load <8 x i16>* %a 784 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) 785 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 786 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4] 787 store <8 x i16> %2, <8 x i16>* %c 788 ; CHECK-DAG: st.h [[R3]], 0($4) 789 790 ret void 791 ; CHECK: .size splati_v8i16_0 792 } 793 794 define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind { 795 ; CHECK: splati_v4i32_0: 796 797 %1 = load <4 x i32>* %a 798 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 799 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 800 ; shf.w and splati.w are equivalent 801 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255 802 store <4 x i32> %2, <4 x i32>* %c 803 ; CHECK-DAG: st.w [[R3]], 0($4) 804 805 ret void 806 ; CHECK: .size splati_v4i32_0 807 } 808 809 define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind { 810 ; CHECK: splati_v2i64_0: 811 812 %1 = load <2 x i64>* %a 813 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 814 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 815 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1] 816 store <2 x i64> %2, <2 x i64>* %c 817 ; CHECK-DAG: st.d [[R3]], 0($4) 818 819 ret void 820 ; CHECK: .size splati_v2i64_0 821 } 822