1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2 3 ; rdar://9428579 4 5 %type1 = type { <16 x i8> } 6 %type2 = type { <8 x i8> } 7 %type3 = type { <4 x i16> } 8 9 10 define hidden fastcc void @t1(%type1** %argtable) nounwind { 11 entry: 12 ; CHECK-LABEL: t1: 13 ; CHECK: ldr x[[REG:[0-9]+]], [x0] 14 ; CHECK: str q0, [x[[REG]]] 15 %tmp1 = load %type1*, %type1** %argtable, align 8 16 %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0 17 store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16 18 ret void 19 } 20 21 define hidden fastcc void @t2(%type2** %argtable) nounwind { 22 entry: 23 ; CHECK-LABEL: t2: 24 ; CHECK: ldr x[[REG:[0-9]+]], [x0] 25 ; CHECK: str d0, [x[[REG]]] 26 %tmp1 = load %type2*, %type2** %argtable, align 8 27 %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0 28 store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8 29 ret void 30 } 31 32 ; add a bunch of tests for rdar://11246289 33 34 @globalArray64x2 = common global <2 x i64>* null, align 8 35 @globalArray32x4 = common global <4 x i32>* null, align 8 36 @globalArray16x8 = common global <8 x i16>* null, align 8 37 @globalArray8x16 = common global <16 x i8>* null, align 8 38 @globalArray64x1 = common global <1 x i64>* null, align 8 39 @globalArray32x2 = common global <2 x i32>* null, align 8 40 @globalArray16x4 = common global <4 x i16>* null, align 8 41 @globalArray8x8 = common global <8 x i8>* null, align 8 42 @floatglobalArray64x2 = common global <2 x double>* null, align 8 43 @floatglobalArray32x4 = common global <4 x float>* null, align 8 44 @floatglobalArray64x1 = common global <1 x double>* null, align 8 45 @floatglobalArray32x2 = common global <2 x float>* null, align 8 46 47 define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp { 48 entry: 49 ; CHECK-LABEL: fct1_64x2: 50 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 51 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]] 52 ; CHECK: ldr [[BASE:x[0-9]+]], 53 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 54 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset 55 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16 56 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8 57 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset 58 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16 59 ret void 60 } 61 62 define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp { 63 entry: 64 ; CHECK-LABEL: fct2_64x2: 65 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 66 ; CHECK: ldr [[BASE:x[0-9]+]], 67 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 68 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3 69 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16 70 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8 71 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5 72 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16 73 ret void 74 } 75 76 define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp { 77 entry: 78 ; CHECK-LABEL: fct1_32x4: 79 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 80 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 81 ; CHECK: ldr [[BASE:x[0-9]+]], 82 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 83 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset 84 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16 85 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8 86 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset 87 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16 88 ret void 89 } 90 91 define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp { 92 entry: 93 ; CHECK-LABEL: fct2_32x4: 94 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 95 ; CHECK: ldr [[BASE:x[0-9]+]], 96 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 97 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3 98 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16 99 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8 100 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5 101 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16 102 ret void 103 } 104 105 define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp { 106 entry: 107 ; CHECK-LABEL: fct1_16x8: 108 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 109 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 110 ; CHECK: ldr [[BASE:x[0-9]+]], 111 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 112 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset 113 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16 114 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8 115 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset 116 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16 117 ret void 118 } 119 120 define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp { 121 entry: 122 ; CHECK-LABEL: fct2_16x8: 123 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 124 ; CHECK: ldr [[BASE:x[0-9]+]], 125 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 126 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3 127 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16 128 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8 129 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5 130 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16 131 ret void 132 } 133 134 define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp { 135 entry: 136 ; CHECK-LABEL: fct1_8x16: 137 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 138 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 139 ; CHECK: ldr [[BASE:x[0-9]+]], 140 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 141 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset 142 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16 143 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8 144 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset 145 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16 146 ret void 147 } 148 149 define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp { 150 entry: 151 ; CHECK-LABEL: fct2_8x16: 152 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 153 ; CHECK: ldr [[BASE:x[0-9]+]], 154 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 155 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3 156 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16 157 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8 158 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5 159 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16 160 ret void 161 } 162 163 define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp { 164 entry: 165 ; CHECK-LABEL: fct1_64x1: 166 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 167 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 168 ; CHECK: ldr [[BASE:x[0-9]+]], 169 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 170 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset 171 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8 172 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8 173 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset 174 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8 175 ret void 176 } 177 178 define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp { 179 entry: 180 ; CHECK-LABEL: fct2_64x1: 181 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 182 ; CHECK: ldr [[BASE:x[0-9]+]], 183 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 184 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3 185 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8 186 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8 187 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5 188 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8 189 ret void 190 } 191 192 define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp { 193 entry: 194 ; CHECK-LABEL: fct1_32x2: 195 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 196 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 197 ; CHECK: ldr [[BASE:x[0-9]+]], 198 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 199 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset 200 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8 201 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8 202 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset 203 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8 204 ret void 205 } 206 207 define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp { 208 entry: 209 ; CHECK-LABEL: fct2_32x2: 210 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 211 ; CHECK: ldr [[BASE:x[0-9]+]], 212 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 213 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3 214 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8 215 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8 216 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5 217 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8 218 ret void 219 } 220 221 define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp { 222 entry: 223 ; CHECK-LABEL: fct1_16x4: 224 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 225 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 226 ; CHECK: ldr [[BASE:x[0-9]+]], 227 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 228 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset 229 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8 230 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8 231 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset 232 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8 233 ret void 234 } 235 236 define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp { 237 entry: 238 ; CHECK-LABEL: fct2_16x4: 239 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 240 ; CHECK: ldr [[BASE:x[0-9]+]], 241 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 242 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3 243 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8 244 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8 245 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5 246 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8 247 ret void 248 } 249 250 define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp { 251 entry: 252 ; CHECK-LABEL: fct1_8x8: 253 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 254 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 255 ; CHECK: ldr [[BASE:x[0-9]+]], 256 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 257 %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset 258 %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8 259 %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8 260 %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset 261 store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8 262 ret void 263 } 264 265 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q 266 ; registers for unscaled vector accesses 267 268 define <1 x i64> @fct0(i8* %str) nounwind readonly ssp { 269 entry: 270 ; CHECK-LABEL: fct0: 271 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 272 %p = getelementptr inbounds i8, i8* %str, i64 3 273 %q = bitcast i8* %p to <1 x i64>* 274 %0 = load <1 x i64>, <1 x i64>* %q, align 8 275 ret <1 x i64> %0 276 } 277 278 define <2 x i32> @fct1(i8* %str) nounwind readonly ssp { 279 entry: 280 ; CHECK-LABEL: fct1: 281 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 282 %p = getelementptr inbounds i8, i8* %str, i64 3 283 %q = bitcast i8* %p to <2 x i32>* 284 %0 = load <2 x i32>, <2 x i32>* %q, align 8 285 ret <2 x i32> %0 286 } 287 288 define <4 x i16> @fct2(i8* %str) nounwind readonly ssp { 289 entry: 290 ; CHECK-LABEL: fct2: 291 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 292 %p = getelementptr inbounds i8, i8* %str, i64 3 293 %q = bitcast i8* %p to <4 x i16>* 294 %0 = load <4 x i16>, <4 x i16>* %q, align 8 295 ret <4 x i16> %0 296 } 297 298 define <8 x i8> @fct3(i8* %str) nounwind readonly ssp { 299 entry: 300 ; CHECK-LABEL: fct3: 301 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 302 %p = getelementptr inbounds i8, i8* %str, i64 3 303 %q = bitcast i8* %p to <8 x i8>* 304 %0 = load <8 x i8>, <8 x i8>* %q, align 8 305 ret <8 x i8> %0 306 } 307 308 define <2 x i64> @fct4(i8* %str) nounwind readonly ssp { 309 entry: 310 ; CHECK-LABEL: fct4: 311 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 312 %p = getelementptr inbounds i8, i8* %str, i64 3 313 %q = bitcast i8* %p to <2 x i64>* 314 %0 = load <2 x i64>, <2 x i64>* %q, align 16 315 ret <2 x i64> %0 316 } 317 318 define <4 x i32> @fct5(i8* %str) nounwind readonly ssp { 319 entry: 320 ; CHECK-LABEL: fct5: 321 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 322 %p = getelementptr inbounds i8, i8* %str, i64 3 323 %q = bitcast i8* %p to <4 x i32>* 324 %0 = load <4 x i32>, <4 x i32>* %q, align 16 325 ret <4 x i32> %0 326 } 327 328 define <8 x i16> @fct6(i8* %str) nounwind readonly ssp { 329 entry: 330 ; CHECK-LABEL: fct6: 331 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 332 %p = getelementptr inbounds i8, i8* %str, i64 3 333 %q = bitcast i8* %p to <8 x i16>* 334 %0 = load <8 x i16>, <8 x i16>* %q, align 16 335 ret <8 x i16> %0 336 } 337 338 define <16 x i8> @fct7(i8* %str) nounwind readonly ssp { 339 entry: 340 ; CHECK-LABEL: fct7: 341 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 342 %p = getelementptr inbounds i8, i8* %str, i64 3 343 %q = bitcast i8* %p to <16 x i8>* 344 %0 = load <16 x i8>, <16 x i8>* %q, align 16 345 ret <16 x i8> %0 346 } 347 348 define void @fct8(i8* %str) nounwind ssp { 349 entry: 350 ; CHECK-LABEL: fct8: 351 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 352 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 353 %p = getelementptr inbounds i8, i8* %str, i64 3 354 %q = bitcast i8* %p to <1 x i64>* 355 %0 = load <1 x i64>, <1 x i64>* %q, align 8 356 %p2 = getelementptr inbounds i8, i8* %str, i64 4 357 %q2 = bitcast i8* %p2 to <1 x i64>* 358 store <1 x i64> %0, <1 x i64>* %q2, align 8 359 ret void 360 } 361 362 define void @fct9(i8* %str) nounwind ssp { 363 entry: 364 ; CHECK-LABEL: fct9: 365 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 366 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 367 %p = getelementptr inbounds i8, i8* %str, i64 3 368 %q = bitcast i8* %p to <2 x i32>* 369 %0 = load <2 x i32>, <2 x i32>* %q, align 8 370 %p2 = getelementptr inbounds i8, i8* %str, i64 4 371 %q2 = bitcast i8* %p2 to <2 x i32>* 372 store <2 x i32> %0, <2 x i32>* %q2, align 8 373 ret void 374 } 375 376 define void @fct10(i8* %str) nounwind ssp { 377 entry: 378 ; CHECK-LABEL: fct10: 379 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 380 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 381 %p = getelementptr inbounds i8, i8* %str, i64 3 382 %q = bitcast i8* %p to <4 x i16>* 383 %0 = load <4 x i16>, <4 x i16>* %q, align 8 384 %p2 = getelementptr inbounds i8, i8* %str, i64 4 385 %q2 = bitcast i8* %p2 to <4 x i16>* 386 store <4 x i16> %0, <4 x i16>* %q2, align 8 387 ret void 388 } 389 390 define void @fct11(i8* %str) nounwind ssp { 391 entry: 392 ; CHECK-LABEL: fct11: 393 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 394 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 395 %p = getelementptr inbounds i8, i8* %str, i64 3 396 %q = bitcast i8* %p to <8 x i8>* 397 %0 = load <8 x i8>, <8 x i8>* %q, align 8 398 %p2 = getelementptr inbounds i8, i8* %str, i64 4 399 %q2 = bitcast i8* %p2 to <8 x i8>* 400 store <8 x i8> %0, <8 x i8>* %q2, align 8 401 ret void 402 } 403 404 define void @fct12(i8* %str) nounwind ssp { 405 entry: 406 ; CHECK-LABEL: fct12: 407 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 408 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 409 %p = getelementptr inbounds i8, i8* %str, i64 3 410 %q = bitcast i8* %p to <2 x i64>* 411 %0 = load <2 x i64>, <2 x i64>* %q, align 16 412 %p2 = getelementptr inbounds i8, i8* %str, i64 4 413 %q2 = bitcast i8* %p2 to <2 x i64>* 414 store <2 x i64> %0, <2 x i64>* %q2, align 16 415 ret void 416 } 417 418 define void @fct13(i8* %str) nounwind ssp { 419 entry: 420 ; CHECK-LABEL: fct13: 421 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 422 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 423 %p = getelementptr inbounds i8, i8* %str, i64 3 424 %q = bitcast i8* %p to <4 x i32>* 425 %0 = load <4 x i32>, <4 x i32>* %q, align 16 426 %p2 = getelementptr inbounds i8, i8* %str, i64 4 427 %q2 = bitcast i8* %p2 to <4 x i32>* 428 store <4 x i32> %0, <4 x i32>* %q2, align 16 429 ret void 430 } 431 432 define void @fct14(i8* %str) nounwind ssp { 433 entry: 434 ; CHECK-LABEL: fct14: 435 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 436 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 437 %p = getelementptr inbounds i8, i8* %str, i64 3 438 %q = bitcast i8* %p to <8 x i16>* 439 %0 = load <8 x i16>, <8 x i16>* %q, align 16 440 %p2 = getelementptr inbounds i8, i8* %str, i64 4 441 %q2 = bitcast i8* %p2 to <8 x i16>* 442 store <8 x i16> %0, <8 x i16>* %q2, align 16 443 ret void 444 } 445 446 define void @fct15(i8* %str) nounwind ssp { 447 entry: 448 ; CHECK-LABEL: fct15: 449 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 450 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 451 %p = getelementptr inbounds i8, i8* %str, i64 3 452 %q = bitcast i8* %p to <16 x i8>* 453 %0 = load <16 x i8>, <16 x i8>* %q, align 16 454 %p2 = getelementptr inbounds i8, i8* %str, i64 4 455 %q2 = bitcast i8* %p2 to <16 x i8>* 456 store <16 x i8> %0, <16 x i8>* %q2, align 16 457 ret void 458 } 459 460 ; Check the building of vector from a single loaded value. 461 ; Part of <rdar://problem/14170854> 462 ; 463 ; Single loads with immediate offset. 464 define <8 x i8> @fct16(i8* nocapture %sp0) { 465 ; CHECK-LABEL: fct16: 466 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 467 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]] 468 entry: 469 %addr = getelementptr i8, i8* %sp0, i64 1 470 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 471 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 472 %vmull.i = mul <8 x i8> %vec, %vec 473 ret <8 x i8> %vmull.i 474 } 475 476 define <16 x i8> @fct17(i8* nocapture %sp0) { 477 ; CHECK-LABEL: fct17: 478 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 479 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]] 480 entry: 481 %addr = getelementptr i8, i8* %sp0, i64 1 482 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 483 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 484 %vmull.i = mul <16 x i8> %vec, %vec 485 ret <16 x i8> %vmull.i 486 } 487 488 define <4 x i16> @fct18(i16* nocapture %sp0) { 489 ; CHECK-LABEL: fct18: 490 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 491 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]] 492 entry: 493 %addr = getelementptr i16, i16* %sp0, i64 1 494 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 495 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 496 %vmull.i = mul <4 x i16> %vec, %vec 497 ret <4 x i16> %vmull.i 498 } 499 500 define <8 x i16> @fct19(i16* nocapture %sp0) { 501 ; CHECK-LABEL: fct19: 502 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 503 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]] 504 entry: 505 %addr = getelementptr i16, i16* %sp0, i64 1 506 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 507 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 508 %vmull.i = mul <8 x i16> %vec, %vec 509 ret <8 x i16> %vmull.i 510 } 511 512 define <2 x i32> @fct20(i32* nocapture %sp0) { 513 ; CHECK-LABEL: fct20: 514 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 515 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]] 516 entry: 517 %addr = getelementptr i32, i32* %sp0, i64 1 518 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 519 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 520 %vmull.i = mul <2 x i32> %vec, %vec 521 ret <2 x i32> %vmull.i 522 } 523 524 define <4 x i32> @fct21(i32* nocapture %sp0) { 525 ; CHECK-LABEL: fct21: 526 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 527 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]] 528 entry: 529 %addr = getelementptr i32, i32* %sp0, i64 1 530 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 531 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 532 %vmull.i = mul <4 x i32> %vec, %vec 533 ret <4 x i32> %vmull.i 534 } 535 536 define <1 x i64> @fct22(i64* nocapture %sp0) { 537 ; CHECK-LABEL: fct22: 538 ; CHECK: ldr d0, [x0, #8] 539 entry: 540 %addr = getelementptr i64, i64* %sp0, i64 1 541 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 542 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 543 ret <1 x i64> %vec 544 } 545 546 define <2 x i64> @fct23(i64* nocapture %sp0) { 547 ; CHECK-LABEL: fct23: 548 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 549 entry: 550 %addr = getelementptr i64, i64* %sp0, i64 1 551 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 552 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 553 ret <2 x i64> %vec 554 } 555 556 ; 557 ; Single loads with register offset. 558 define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) { 559 ; CHECK-LABEL: fct24: 560 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 561 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]] 562 entry: 563 %addr = getelementptr i8, i8* %sp0, i64 %offset 564 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 565 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 566 %vmull.i = mul <8 x i8> %vec, %vec 567 ret <8 x i8> %vmull.i 568 } 569 570 define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) { 571 ; CHECK-LABEL: fct25: 572 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 573 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]] 574 entry: 575 %addr = getelementptr i8, i8* %sp0, i64 %offset 576 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 577 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 578 %vmull.i = mul <16 x i8> %vec, %vec 579 ret <16 x i8> %vmull.i 580 } 581 582 define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) { 583 ; CHECK-LABEL: fct26: 584 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 585 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]] 586 entry: 587 %addr = getelementptr i16, i16* %sp0, i64 %offset 588 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 589 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 590 %vmull.i = mul <4 x i16> %vec, %vec 591 ret <4 x i16> %vmull.i 592 } 593 594 define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) { 595 ; CHECK-LABEL: fct27: 596 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 597 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]] 598 entry: 599 %addr = getelementptr i16, i16* %sp0, i64 %offset 600 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 601 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 602 %vmull.i = mul <8 x i16> %vec, %vec 603 ret <8 x i16> %vmull.i 604 } 605 606 define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) { 607 ; CHECK-LABEL: fct28: 608 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 609 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]] 610 entry: 611 %addr = getelementptr i32, i32* %sp0, i64 %offset 612 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 613 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 614 %vmull.i = mul <2 x i32> %vec, %vec 615 ret <2 x i32> %vmull.i 616 } 617 618 define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) { 619 ; CHECK-LABEL: fct29: 620 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 621 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]] 622 entry: 623 %addr = getelementptr i32, i32* %sp0, i64 %offset 624 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 625 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 626 %vmull.i = mul <4 x i32> %vec, %vec 627 ret <4 x i32> %vmull.i 628 } 629 630 define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) { 631 ; CHECK-LABEL: fct30: 632 ; CHECK: ldr d0, [x0, x1, lsl #3] 633 entry: 634 %addr = getelementptr i64, i64* %sp0, i64 %offset 635 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 636 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 637 ret <1 x i64> %vec 638 } 639 640 define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) { 641 ; CHECK-LABEL: fct31: 642 ; CHECK: ldr d0, [x0, x1, lsl #3] 643 entry: 644 %addr = getelementptr i64, i64* %sp0, i64 %offset 645 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 646 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 647 ret <2 x i64> %vec 648 } 649