1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2 3 define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4 ; CHECK-LABEL: vtrni8: 5 ; CHECK: @ BB#0: 6 ; CHECK-NEXT: vldr d16, [r1] 7 ; CHECK-NEXT: vldr d17, [r0] 8 ; CHECK-NEXT: vtrn.8 d17, d16 9 ; CHECK-NEXT: vadd.i8 d16, d17, d16 10 ; CHECK-NEXT: vmov r0, r1, d16 11 ; CHECK-NEXT: mov pc, lr 12 %tmp1 = load <8 x i8>, <8 x i8>* %A 13 %tmp2 = load <8 x i8>, <8 x i8>* %B 14 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 15 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 16 %tmp5 = add <8 x i8> %tmp3, %tmp4 17 ret <8 x i8> %tmp5 18 } 19 20 define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 21 ; CHECK-LABEL: vtrni8_Qres: 22 ; CHECK: @ BB#0: 23 ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 24 ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 25 ; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]] 26 ; CHECK-NEXT: vmov r0, r1, [[LDR0]] 27 ; CHECK-NEXT: vmov r2, r3, [[LDR1]] 28 ; CHECK-NEXT: mov pc, lr 29 %tmp1 = load <8 x i8>, <8 x i8>* %A 30 %tmp2 = load <8 x i8>, <8 x i8>* %B 31 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 32 ret <16 x i8> %tmp3 33 } 34 35 define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 36 ; CHECK-LABEL: vtrni16: 37 ; CHECK: @ BB#0: 38 ; CHECK-NEXT: vldr d16, [r1] 39 ; CHECK-NEXT: vldr d17, [r0] 40 ; CHECK-NEXT: vtrn.16 d17, d16 41 ; CHECK-NEXT: vadd.i16 d16, d17, d16 42 ; CHECK-NEXT: vmov r0, r1, d16 43 ; CHECK-NEXT: mov pc, lr 44 %tmp1 = load <4 x i16>, <4 x i16>* %A 45 %tmp2 = load <4 x i16>, <4 x i16>* %B 46 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 47 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 48 %tmp5 = add <4 x i16> %tmp3, %tmp4 49 ret <4 x i16> %tmp5 50 } 51 52 define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { 53 ; CHECK-LABEL: vtrni16_Qres: 54 ; CHECK: @ BB#0: 55 ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 56 ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 57 ; CHECK-NEXT: vtrn.16 [[LDR0]], [[LDR1]] 58 ; CHECK-NEXT: vmov r0, r1, [[LDR0]] 59 ; CHECK-NEXT: vmov r2, r3, [[LDR1]] 60 ; CHECK-NEXT: mov pc, lr 61 %tmp1 = load <4 x i16>, <4 x i16>* %A 62 %tmp2 = load <4 x i16>, <4 x i16>* %B 63 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 64 ret <8 x i16> %tmp3 65 } 66 67 define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 68 ; CHECK-LABEL: vtrni32: 69 ; CHECK: @ BB#0: 70 ; CHECK-NEXT: vldr d16, [r1] 71 ; CHECK-NEXT: vldr d17, [r0] 72 ; CHECK-NEXT: vtrn.32 d17, d16 73 ; CHECK-NEXT: vadd.i32 d16, d17, d16 74 ; CHECK-NEXT: vmov r0, r1, d16 75 ; CHECK-NEXT: mov pc, lr 76 %tmp1 = load <2 x i32>, <2 x i32>* %A 77 %tmp2 = load <2 x i32>, <2 x i32>* %B 78 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2> 79 %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3> 80 %tmp5 = add <2 x i32> %tmp3, %tmp4 81 ret <2 x i32> %tmp5 82 } 83 84 define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind { 85 ; CHECK-LABEL: vtrni32_Qres: 86 ; CHECK: @ BB#0: 87 ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 88 ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 89 ; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]] 90 ; CHECK-NEXT: vmov r0, r1, [[LDR0]] 91 ; CHECK-NEXT: vmov r2, r3, [[LDR1]] 92 ; CHECK-NEXT: mov pc, lr 93 %tmp1 = load <2 x i32>, <2 x i32>* %A 94 %tmp2 = load <2 x i32>, <2 x i32>* %B 95 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 96 ret <4 x i32> %tmp3 97 } 98 99 define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { 100 ; CHECK-LABEL: vtrnf: 101 ; CHECK: @ BB#0: 102 ; CHECK-NEXT: vldr d16, [r1] 103 ; CHECK-NEXT: vldr d17, [r0] 104 ; CHECK-NEXT: vtrn.32 d17, d16 105 ; CHECK-NEXT: vadd.f32 d16, d17, d16 106 ; CHECK-NEXT: vmov r0, r1, d16 107 ; CHECK-NEXT: mov pc, lr 108 %tmp1 = load <2 x float>, <2 x float>* %A 109 %tmp2 = load <2 x float>, <2 x float>* %B 110 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2> 111 %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3> 112 %tmp5 = fadd <2 x float> %tmp3, %tmp4 113 ret <2 x float> %tmp5 114 } 115 116 define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind { 117 ; CHECK-LABEL: vtrnf_Qres: 118 ; CHECK: @ BB#0: 119 ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 120 ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 121 ; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]] 122 ; CHECK-NEXT: vmov r0, r1, [[LDR0]] 123 ; CHECK-NEXT: vmov r2, r3, [[LDR1]] 124 ; CHECK-NEXT: mov pc, lr 125 %tmp1 = load <2 x float>, <2 x float>* %A 126 %tmp2 = load <2 x float>, <2 x float>* %B 127 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 128 ret <4 x float> %tmp3 129 } 130 131 define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 132 ; CHECK-LABEL: vtrnQi8: 133 ; CHECK: @ BB#0: 134 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 135 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 136 ; CHECK-NEXT: vtrn.8 q9, q8 137 ; CHECK-NEXT: vadd.i8 q8, q9, q8 138 ; CHECK-NEXT: vmov r0, r1, d16 139 ; CHECK-NEXT: vmov r2, r3, d17 140 ; CHECK-NEXT: mov pc, lr 141 %tmp1 = load <16 x i8>, <16 x i8>* %A 142 %tmp2 = load <16 x i8>, <16 x i8>* %B 143 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 144 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 145 %tmp5 = add <16 x i8> %tmp3, %tmp4 146 ret <16 x i8> %tmp5 147 } 148 149 define <32 x i8> @vtrnQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { 150 ; CHECK-LABEL: vtrnQi8_QQres: 151 ; CHECK: @ BB#0: 152 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 153 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 154 ; CHECK-NEXT: vtrn.8 q9, q8 155 ; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! 156 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 157 ; CHECK-NEXT: mov pc, lr 158 %tmp1 = load <16 x i8>, <16 x i8>* %A 159 %tmp2 = load <16 x i8>, <16 x i8>* %B 160 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30, i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 161 ret <32 x i8> %tmp3 162 } 163 164 define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 165 ; CHECK-LABEL: vtrnQi16: 166 ; CHECK: @ BB#0: 167 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 168 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 169 ; CHECK-NEXT: vtrn.16 q9, q8 170 ; CHECK-NEXT: vadd.i16 q8, q9, q8 171 ; CHECK-NEXT: vmov r0, r1, d16 172 ; CHECK-NEXT: vmov r2, r3, d17 173 ; CHECK-NEXT: mov pc, lr 174 %tmp1 = load <8 x i16>, <8 x i16>* %A 175 %tmp2 = load <8 x i16>, <8 x i16>* %B 176 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 177 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 178 %tmp5 = add <8 x i16> %tmp3, %tmp4 179 ret <8 x i16> %tmp5 180 } 181 182 define <16 x i16> @vtrnQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 183 ; CHECK-LABEL: vtrnQi16_QQres: 184 ; CHECK: @ BB#0: 185 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 186 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 187 ; CHECK-NEXT: vtrn.16 q9, q8 188 ; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 189 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 190 ; CHECK-NEXT: mov pc, lr 191 %tmp1 = load <8 x i16>, <8 x i16>* %A 192 %tmp2 = load <8 x i16>, <8 x i16>* %B 193 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 194 ret <16 x i16> %tmp3 195 } 196 197 define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 198 ; CHECK-LABEL: vtrnQi32: 199 ; CHECK: @ BB#0: 200 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 201 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 202 ; CHECK-NEXT: vtrn.32 q9, q8 203 ; CHECK-NEXT: vadd.i32 q8, q9, q8 204 ; CHECK-NEXT: vmov r0, r1, d16 205 ; CHECK-NEXT: vmov r2, r3, d17 206 ; CHECK-NEXT: mov pc, lr 207 %tmp1 = load <4 x i32>, <4 x i32>* %A 208 %tmp2 = load <4 x i32>, <4 x i32>* %B 209 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 210 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 211 %tmp5 = add <4 x i32> %tmp3, %tmp4 212 ret <4 x i32> %tmp5 213 } 214 215 define <8 x i32> @vtrnQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { 216 ; CHECK-LABEL: vtrnQi32_QQres: 217 ; CHECK: @ BB#0: 218 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 219 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 220 ; CHECK-NEXT: vtrn.32 q9, q8 221 ; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 222 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 223 ; CHECK-NEXT: mov pc, lr 224 %tmp1 = load <4 x i32>, <4 x i32>* %A 225 %tmp2 = load <4 x i32>, <4 x i32>* %B 226 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 227 ret <8 x i32> %tmp3 228 } 229 230 define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { 231 ; CHECK-LABEL: vtrnQf: 232 ; CHECK: @ BB#0: 233 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 234 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 235 ; CHECK-NEXT: vtrn.32 q9, q8 236 ; CHECK-NEXT: vadd.f32 q8, q9, q8 237 ; CHECK-NEXT: vmov r0, r1, d16 238 ; CHECK-NEXT: vmov r2, r3, d17 239 ; CHECK-NEXT: mov pc, lr 240 %tmp1 = load <4 x float>, <4 x float>* %A 241 %tmp2 = load <4 x float>, <4 x float>* %B 242 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 243 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 244 %tmp5 = fadd <4 x float> %tmp3, %tmp4 245 ret <4 x float> %tmp5 246 } 247 248 define <8 x float> @vtrnQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { 249 ; CHECK-LABEL: vtrnQf_QQres: 250 ; CHECK: @ BB#0: 251 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 252 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 253 ; CHECK-NEXT: vtrn.32 q9, q8 254 ; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 255 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 256 ; CHECK-NEXT: mov pc, lr 257 %tmp1 = load <4 x float>, <4 x float>* %A 258 %tmp2 = load <4 x float>, <4 x float>* %B 259 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 2, i32 6, i32 1, i32 5, i32 3, i32 7> 260 ret <8 x float> %tmp3 261 } 262 263 264 define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { 265 ; CHECK-LABEL: vtrni8_undef: 266 ; CHECK: @ BB#0: 267 ; CHECK-NEXT: vldr d16, [r1] 268 ; CHECK-NEXT: vldr d17, [r0] 269 ; CHECK-NEXT: vtrn.8 d17, d16 270 ; CHECK-NEXT: vadd.i8 d16, d17, d16 271 ; CHECK-NEXT: vmov r0, r1, d16 272 ; CHECK-NEXT: mov pc, lr 273 %tmp1 = load <8 x i8>, <8 x i8>* %A 274 %tmp2 = load <8 x i8>, <8 x i8>* %B 275 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14> 276 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> 277 %tmp5 = add <8 x i8> %tmp3, %tmp4 278 ret <8 x i8> %tmp5 279 } 280 281 define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 282 ; CHECK-LABEL: vtrni8_undef_Qres: 283 ; CHECK: @ BB#0: 284 ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] 285 ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] 286 ; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]] 287 ; CHECK-NEXT: vmov r0, r1, [[LDR0]] 288 ; CHECK-NEXT: vmov r2, r3, [[LDR1]] 289 ; CHECK-NEXT: mov pc, lr 290 %tmp1 = load <8 x i8>, <8 x i8>* %A 291 %tmp2 = load <8 x i8>, <8 x i8>* %B 292 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14, i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15> 293 ret <16 x i8> %tmp3 294 } 295 296 define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { 297 ; CHECK-LABEL: vtrnQi16_undef: 298 ; CHECK: @ BB#0: 299 ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 300 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 301 ; CHECK-NEXT: vtrn.16 q9, q8 302 ; CHECK-NEXT: vadd.i16 q8, q9, q8 303 ; CHECK-NEXT: vmov r0, r1, d16 304 ; CHECK-NEXT: vmov r2, r3, d17 305 ; CHECK-NEXT: mov pc, lr 306 %tmp1 = load <8 x i16>, <8 x i16>* %A 307 %tmp2 = load <8 x i16>, <8 x i16>* %B 308 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14> 309 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> 310 %tmp5 = add <8 x i16> %tmp3, %tmp4 311 ret <8 x i16> %tmp5 312 } 313 314 define <16 x i16> @vtrnQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 315 ; CHECK-LABEL: vtrnQi16_undef_QQres: 316 ; CHECK: @ BB#0: 317 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 318 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 319 ; CHECK-NEXT: vtrn.16 q9, q8 320 ; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 321 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 322 ; CHECK-NEXT: mov pc, lr 323 %tmp1 = load <8 x i16>, <8 x i16>* %A 324 %tmp2 = load <8 x i16>, <8 x i16>* %B 325 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef> 326 ret <16 x i16> %tmp3 327 } 328 329 define <8 x i16> @vtrn_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) { 330 entry: 331 ; CHECK-LABEL: vtrn_lower_shufflemask_undef 332 ; CHECK: vtrn 333 %tmp1 = load <4 x i16>, <4 x i16>* %A 334 %tmp2 = load <4 x i16>, <4 x i16>* %B 335 %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7> 336 ret <8 x i16> %0 337 } 338 339 ; Here we get a build_vector node, where all the incoming extract_element 340 ; values do modify the type. However, we get different input types, as some of 341 ; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of 342 ; them get truncated from i16 to i8 (from comparing cmp2 with cmp3). 343 define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1, 344 <4 x i32> %cmp0, <4 x i32> %cmp1, 345 <4 x i16> %cmp2, <4 x i16> %cmp3) { 346 ; CHECK-LABEL: vtrn_mismatched_builvector0 347 ; CHECK: vmovn.i32 348 ; CHECK: vtrn 349 ; CHECK: vbsl 350 %c0 = icmp ult <4 x i32> %cmp0, %cmp1 351 %c1 = icmp ult <4 x i16> %cmp2, %cmp3 352 %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 353 %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 354 ret <8 x i8> %rv 355 } 356 357 ; Here we get a build_vector node, where half the incoming extract_element 358 ; values do not modify the type (the values form cmp2), but half of them do 359 ; (from the icmp operation). 360 define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1, 361 <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) { 362 ; CHECK-LABEL: vtrn_mismatched_builvector1 363 ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn 364 ; CHECK: vmovl 365 ; CHECK: vtrn.8 366 ; CHECK: vbsl 367 %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4 368 %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1> 369 %c0 = icmp ult <4 x i32> %cmp0, %cmp1 370 %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 371 %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1 372 ret <8 x i8> %rv 373 } 374 375 ; Negative test that should not generate a vtrn 376 define void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { 377 entry: 378 ; CHECK-LABEL: lower_twice_no_vtrn 379 ; CHECK: @ BB#0: 380 ; CHECK-NOT: vtrn 381 ; CHECK: mov pc, lr 382 %tmp1 = load <4 x i16>, <4 x i16>* %A 383 %tmp2 = load <4 x i16>, <4 x i16>* %B 384 %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7> 385 store <8 x i16> %0, <8 x i16>* %C 386 ret void 387 } 388 389 ; Negative test that should not generate a vtrn 390 define void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { 391 entry: 392 ; CHECK-LABEL: upper_twice_no_vtrn 393 ; CHECK: @ BB#0: 394 ; CHECK-NOT: vtrn 395 ; CHECK: mov pc, lr 396 %tmp1 = load <4 x i16>, <4 x i16>* %A 397 %tmp2 = load <4 x i16>, <4 x i16>* %B 398 %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6> 399 store <8 x i16> %0, <8 x i16>* %C 400 ret void 401 } 402