1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2 3 ; rdar://9428579 4 5 %type1 = type { <16 x i8> } 6 %type2 = type { <8 x i8> } 7 %type3 = type { <4 x i16> } 8 9 10 define hidden fastcc void @t1(%type1** %argtable) nounwind { 11 entry: 12 ; CHECK-LABEL: t1: 13 ; CHECK: ldr x[[REG:[0-9]+]], [x0] 14 ; CHECK: str q0, [x[[REG]]] 15 %tmp1 = load %type1*, %type1** %argtable, align 8 16 %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0 17 store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16 18 ret void 19 } 20 21 define hidden fastcc void @t2(%type2** %argtable) nounwind { 22 entry: 23 ; CHECK-LABEL: t2: 24 ; CHECK: ldr x[[REG:[0-9]+]], [x0] 25 ; CHECK: str d0, [x[[REG]]] 26 %tmp1 = load %type2*, %type2** %argtable, align 8 27 %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0 28 store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8 29 ret void 30 } 31 32 ; add a bunch of tests for rdar://11246289 33 34 @globalArray64x2 = common global <2 x i64>* null, align 8 35 @globalArray32x4 = common global <4 x i32>* null, align 8 36 @globalArray16x8 = common global <8 x i16>* null, align 8 37 @globalArray8x16 = common global <16 x i8>* null, align 8 38 @globalArray64x1 = common global <1 x i64>* null, align 8 39 @globalArray32x2 = common global <2 x i32>* null, align 8 40 @globalArray16x4 = common global <4 x i16>* null, align 8 41 @globalArray8x8 = common global <8 x i8>* null, align 8 42 @floatglobalArray64x2 = common global <2 x double>* null, align 8 43 @floatglobalArray32x4 = common global <4 x float>* null, align 8 44 @floatglobalArray64x1 = common global <1 x double>* null, align 8 45 @floatglobalArray32x2 = common global <2 x float>* null, align 8 46 47 define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp { 48 entry: 49 ; CHECK-LABEL: fct1_64x2: 50 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 51 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]] 52 ; CHECK: ldr [[BASE:x[0-9]+]], 53 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 54 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset 55 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16 56 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8 57 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset 58 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16 59 ret void 60 } 61 62 define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp { 63 entry: 64 ; CHECK-LABEL: fct2_64x2: 65 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 66 ; CHECK: ldr [[BASE:x[0-9]+]], 67 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 68 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3 69 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16 70 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8 71 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5 72 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16 73 ret void 74 } 75 76 define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp { 77 entry: 78 ; CHECK-LABEL: fct1_32x4: 79 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 80 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 81 ; CHECK: ldr [[BASE:x[0-9]+]], 82 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 83 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset 84 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16 85 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8 86 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset 87 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16 88 ret void 89 } 90 91 define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp { 92 entry: 93 ; CHECK-LABEL: fct2_32x4: 94 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 95 ; CHECK: ldr [[BASE:x[0-9]+]], 96 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 97 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3 98 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16 99 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8 100 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5 101 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16 102 ret void 103 } 104 105 define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp { 106 entry: 107 ; CHECK-LABEL: fct1_16x8: 108 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 109 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 110 ; CHECK: ldr [[BASE:x[0-9]+]], 111 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 112 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset 113 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16 114 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8 115 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset 116 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16 117 ret void 118 } 119 120 define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp { 121 entry: 122 ; CHECK-LABEL: fct2_16x8: 123 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 124 ; CHECK: ldr [[BASE:x[0-9]+]], 125 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 126 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3 127 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16 128 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8 129 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5 130 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16 131 ret void 132 } 133 134 define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp { 135 entry: 136 ; CHECK-LABEL: fct1_8x16: 137 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 138 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 139 ; CHECK: ldr [[BASE:x[0-9]+]], 140 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 141 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset 142 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16 143 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8 144 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset 145 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16 146 ret void 147 } 148 149 define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp { 150 entry: 151 ; CHECK-LABEL: fct2_8x16: 152 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 153 ; CHECK: ldr [[BASE:x[0-9]+]], 154 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 155 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3 156 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16 157 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8 158 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5 159 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16 160 ret void 161 } 162 163 define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp { 164 entry: 165 ; CHECK-LABEL: fct1_64x1: 166 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 167 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 168 ; CHECK: ldr [[BASE:x[0-9]+]], 169 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 170 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset 171 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8 172 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8 173 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset 174 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8 175 ret void 176 } 177 178 define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp { 179 entry: 180 ; CHECK-LABEL: fct2_64x1: 181 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 182 ; CHECK: ldr [[BASE:x[0-9]+]], 183 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 184 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3 185 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8 186 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8 187 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5 188 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8 189 ret void 190 } 191 192 define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp { 193 entry: 194 ; CHECK-LABEL: fct1_32x2: 195 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 196 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 197 ; CHECK: ldr [[BASE:x[0-9]+]], 198 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 199 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset 200 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8 201 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8 202 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset 203 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8 204 ret void 205 } 206 207 define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp { 208 entry: 209 ; CHECK-LABEL: fct2_32x2: 210 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 211 ; CHECK: ldr [[BASE:x[0-9]+]], 212 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 213 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3 214 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8 215 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8 216 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5 217 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8 218 ret void 219 } 220 221 define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp { 222 entry: 223 ; CHECK-LABEL: fct1_16x4: 224 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 225 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 226 ; CHECK: ldr [[BASE:x[0-9]+]], 227 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 228 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset 229 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8 230 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8 231 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset 232 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8 233 ret void 234 } 235 236 define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp { 237 entry: 238 ; CHECK-LABEL: fct2_16x4: 239 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 240 ; CHECK: ldr [[BASE:x[0-9]+]], 241 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 242 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3 243 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8 244 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8 245 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5 246 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8 247 ret void 248 } 249 250 define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp { 251 entry: 252 ; CHECK-LABEL: fct1_8x8: 253 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 254 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 255 ; CHECK: ldr [[BASE:x[0-9]+]], 256 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 257 %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset 258 %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8 259 %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8 260 %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset 261 store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8 262 ret void 263 } 264 265 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q 266 ; registers for unscaled vector accesses 267 @str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1 268 269 define <1 x i64> @fct0() nounwind readonly ssp { 270 entry: 271 ; CHECK-LABEL: fct0: 272 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 273 %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 274 ret <1 x i64> %0 275 } 276 277 define <2 x i32> @fct1() nounwind readonly ssp { 278 entry: 279 ; CHECK-LABEL: fct1: 280 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 281 %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 282 ret <2 x i32> %0 283 } 284 285 define <4 x i16> @fct2() nounwind readonly ssp { 286 entry: 287 ; CHECK-LABEL: fct2: 288 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 289 %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 290 ret <4 x i16> %0 291 } 292 293 define <8 x i8> @fct3() nounwind readonly ssp { 294 entry: 295 ; CHECK-LABEL: fct3: 296 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 297 %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 298 ret <8 x i8> %0 299 } 300 301 define <2 x i64> @fct4() nounwind readonly ssp { 302 entry: 303 ; CHECK-LABEL: fct4: 304 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 305 %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 306 ret <2 x i64> %0 307 } 308 309 define <4 x i32> @fct5() nounwind readonly ssp { 310 entry: 311 ; CHECK-LABEL: fct5: 312 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 313 %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 314 ret <4 x i32> %0 315 } 316 317 define <8 x i16> @fct6() nounwind readonly ssp { 318 entry: 319 ; CHECK-LABEL: fct6: 320 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 321 %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 322 ret <8 x i16> %0 323 } 324 325 define <16 x i8> @fct7() nounwind readonly ssp { 326 entry: 327 ; CHECK-LABEL: fct7: 328 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 329 %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 330 ret <16 x i8> %0 331 } 332 333 define void @fct8() nounwind ssp { 334 entry: 335 ; CHECK-LABEL: fct8: 336 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 337 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 338 %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 339 store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8 340 ret void 341 } 342 343 define void @fct9() nounwind ssp { 344 entry: 345 ; CHECK-LABEL: fct9: 346 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 347 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 348 %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 349 store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8 350 ret void 351 } 352 353 define void @fct10() nounwind ssp { 354 entry: 355 ; CHECK-LABEL: fct10: 356 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 357 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 358 %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 359 store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8 360 ret void 361 } 362 363 define void @fct11() nounwind ssp { 364 entry: 365 ; CHECK-LABEL: fct11: 366 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 367 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 368 %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 369 store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8 370 ret void 371 } 372 373 define void @fct12() nounwind ssp { 374 entry: 375 ; CHECK-LABEL: fct12: 376 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 377 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 378 %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 379 store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16 380 ret void 381 } 382 383 define void @fct13() nounwind ssp { 384 entry: 385 ; CHECK-LABEL: fct13: 386 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 387 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 388 %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 389 store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16 390 ret void 391 } 392 393 define void @fct14() nounwind ssp { 394 entry: 395 ; CHECK-LABEL: fct14: 396 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 397 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 398 %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 399 store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16 400 ret void 401 } 402 403 define void @fct15() nounwind ssp { 404 entry: 405 ; CHECK-LABEL: fct15: 406 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 407 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 408 %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 409 store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16 410 ret void 411 } 412 413 ; Check the building of vector from a single loaded value. 414 ; Part of <rdar://problem/14170854> 415 ; 416 ; Single loads with immediate offset. 417 define <8 x i8> @fct16(i8* nocapture %sp0) { 418 ; CHECK-LABEL: fct16: 419 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 420 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]] 421 entry: 422 %addr = getelementptr i8, i8* %sp0, i64 1 423 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 424 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 425 %vmull.i = mul <8 x i8> %vec, %vec 426 ret <8 x i8> %vmull.i 427 } 428 429 define <16 x i8> @fct17(i8* nocapture %sp0) { 430 ; CHECK-LABEL: fct17: 431 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 432 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]] 433 entry: 434 %addr = getelementptr i8, i8* %sp0, i64 1 435 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 436 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 437 %vmull.i = mul <16 x i8> %vec, %vec 438 ret <16 x i8> %vmull.i 439 } 440 441 define <4 x i16> @fct18(i16* nocapture %sp0) { 442 ; CHECK-LABEL: fct18: 443 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 444 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]] 445 entry: 446 %addr = getelementptr i16, i16* %sp0, i64 1 447 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 448 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 449 %vmull.i = mul <4 x i16> %vec, %vec 450 ret <4 x i16> %vmull.i 451 } 452 453 define <8 x i16> @fct19(i16* nocapture %sp0) { 454 ; CHECK-LABEL: fct19: 455 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 456 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]] 457 entry: 458 %addr = getelementptr i16, i16* %sp0, i64 1 459 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 460 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 461 %vmull.i = mul <8 x i16> %vec, %vec 462 ret <8 x i16> %vmull.i 463 } 464 465 define <2 x i32> @fct20(i32* nocapture %sp0) { 466 ; CHECK-LABEL: fct20: 467 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 468 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]] 469 entry: 470 %addr = getelementptr i32, i32* %sp0, i64 1 471 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 472 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 473 %vmull.i = mul <2 x i32> %vec, %vec 474 ret <2 x i32> %vmull.i 475 } 476 477 define <4 x i32> @fct21(i32* nocapture %sp0) { 478 ; CHECK-LABEL: fct21: 479 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 480 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]] 481 entry: 482 %addr = getelementptr i32, i32* %sp0, i64 1 483 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 484 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 485 %vmull.i = mul <4 x i32> %vec, %vec 486 ret <4 x i32> %vmull.i 487 } 488 489 define <1 x i64> @fct22(i64* nocapture %sp0) { 490 ; CHECK-LABEL: fct22: 491 ; CHECK: ldr d0, [x0, #8] 492 entry: 493 %addr = getelementptr i64, i64* %sp0, i64 1 494 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 495 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 496 ret <1 x i64> %vec 497 } 498 499 define <2 x i64> @fct23(i64* nocapture %sp0) { 500 ; CHECK-LABEL: fct23: 501 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 502 entry: 503 %addr = getelementptr i64, i64* %sp0, i64 1 504 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 505 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 506 ret <2 x i64> %vec 507 } 508 509 ; 510 ; Single loads with register offset. 511 define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) { 512 ; CHECK-LABEL: fct24: 513 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 514 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]] 515 entry: 516 %addr = getelementptr i8, i8* %sp0, i64 %offset 517 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 518 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 519 %vmull.i = mul <8 x i8> %vec, %vec 520 ret <8 x i8> %vmull.i 521 } 522 523 define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) { 524 ; CHECK-LABEL: fct25: 525 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 526 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]] 527 entry: 528 %addr = getelementptr i8, i8* %sp0, i64 %offset 529 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 530 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 531 %vmull.i = mul <16 x i8> %vec, %vec 532 ret <16 x i8> %vmull.i 533 } 534 535 define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) { 536 ; CHECK-LABEL: fct26: 537 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 538 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]] 539 entry: 540 %addr = getelementptr i16, i16* %sp0, i64 %offset 541 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 542 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 543 %vmull.i = mul <4 x i16> %vec, %vec 544 ret <4 x i16> %vmull.i 545 } 546 547 define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) { 548 ; CHECK-LABEL: fct27: 549 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 550 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]] 551 entry: 552 %addr = getelementptr i16, i16* %sp0, i64 %offset 553 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 554 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 555 %vmull.i = mul <8 x i16> %vec, %vec 556 ret <8 x i16> %vmull.i 557 } 558 559 define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) { 560 ; CHECK-LABEL: fct28: 561 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 562 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]] 563 entry: 564 %addr = getelementptr i32, i32* %sp0, i64 %offset 565 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 566 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 567 %vmull.i = mul <2 x i32> %vec, %vec 568 ret <2 x i32> %vmull.i 569 } 570 571 define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) { 572 ; CHECK-LABEL: fct29: 573 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 574 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]] 575 entry: 576 %addr = getelementptr i32, i32* %sp0, i64 %offset 577 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 578 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 579 %vmull.i = mul <4 x i32> %vec, %vec 580 ret <4 x i32> %vmull.i 581 } 582 583 define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) { 584 ; CHECK-LABEL: fct30: 585 ; CHECK: ldr d0, [x0, x1, lsl #3] 586 entry: 587 %addr = getelementptr i64, i64* %sp0, i64 %offset 588 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 589 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 590 ret <1 x i64> %vec 591 } 592 593 define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) { 594 ; CHECK-LABEL: fct31: 595 ; CHECK: ldr d0, [x0, x1, lsl #3] 596 entry: 597 %addr = getelementptr i64, i64* %sp0, i64 %offset 598 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 599 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 600 ret <2 x i64> %vec 601 } 602