1 ; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s 2 3 ; CHECK-LABEL: ldp_int 4 ; CHECK: ldp 5 define i32 @ldp_int(i32* %p) nounwind { 6 %tmp = load i32, i32* %p, align 4 7 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 8 %tmp1 = load i32, i32* %add.ptr, align 4 9 %add = add nsw i32 %tmp1, %tmp 10 ret i32 %add 11 } 12 13 ; CHECK-LABEL: ldp_sext_int 14 ; CHECK: ldpsw 15 define i64 @ldp_sext_int(i32* %p) nounwind { 16 %tmp = load i32, i32* %p, align 4 17 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 18 %tmp1 = load i32, i32* %add.ptr, align 4 19 %sexttmp = sext i32 %tmp to i64 20 %sexttmp1 = sext i32 %tmp1 to i64 21 %add = add nsw i64 %sexttmp1, %sexttmp 22 ret i64 %add 23 } 24 25 ; CHECK-LABEL: ldp_half_sext_res0_int: 26 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] 27 ; CHECK: sxtw x[[DST1]], w[[DST1]] 28 define i64 @ldp_half_sext_res0_int(i32* %p) nounwind { 29 %tmp = load i32, i32* %p, align 4 30 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 31 %tmp1 = load i32, i32* %add.ptr, align 4 32 %sexttmp = sext i32 %tmp to i64 33 %sexttmp1 = zext i32 %tmp1 to i64 34 %add = add nsw i64 %sexttmp1, %sexttmp 35 ret i64 %add 36 } 37 38 ; CHECK-LABEL: ldp_half_sext_res1_int: 39 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] 40 ; CHECK: sxtw x[[DST2]], w[[DST2]] 41 define i64 @ldp_half_sext_res1_int(i32* %p) nounwind { 42 %tmp = load i32, i32* %p, align 4 43 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 44 %tmp1 = load i32, i32* %add.ptr, align 4 45 %sexttmp = zext i32 %tmp to i64 46 %sexttmp1 = sext i32 %tmp1 to i64 47 %add = add nsw i64 %sexttmp1, %sexttmp 48 ret i64 %add 49 } 50 51 52 ; CHECK-LABEL: ldp_long 53 ; CHECK: ldp 54 define i64 @ldp_long(i64* %p) nounwind { 55 %tmp = load i64, i64* %p, align 8 56 %add.ptr = getelementptr inbounds i64, i64* %p, i64 1 57 %tmp1 = load i64, i64* %add.ptr, align 8 58 %add = add nsw i64 %tmp1, %tmp 59 ret i64 %add 60 } 61 62 ; CHECK-LABEL: ldp_float 63 ; CHECK: ldp 64 define float @ldp_float(float* %p) nounwind { 65 %tmp = load float, float* %p, align 4 66 %add.ptr = getelementptr inbounds float, float* %p, i64 1 67 %tmp1 = load float, float* %add.ptr, align 4 68 %add = fadd float %tmp, %tmp1 69 ret float %add 70 } 71 72 ; CHECK-LABEL: ldp_double 73 ; CHECK: ldp 74 define double @ldp_double(double* %p) nounwind { 75 %tmp = load double, double* %p, align 8 76 %add.ptr = getelementptr inbounds double, double* %p, i64 1 77 %tmp1 = load double, double* %add.ptr, align 8 78 %add = fadd double %tmp, %tmp1 79 ret double %add 80 } 81 82 ; CHECK-LABEL: ldp_doublex2 83 ; CHECK: ldp 84 define <2 x double> @ldp_doublex2(<2 x double>* %p) nounwind { 85 %tmp = load <2 x double>, <2 x double>* %p, align 16 86 %add.ptr = getelementptr inbounds <2 x double>, <2 x double>* %p, i64 1 87 %tmp1 = load <2 x double>, <2 x double>* %add.ptr, align 16 88 %add = fadd <2 x double> %tmp, %tmp1 89 ret <2 x double> %add 90 } 91 92 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate 93 define i32 @ldur_int(i32* %a) nounwind { 94 ; CHECK-LABEL: ldur_int 95 ; CHECK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8] 96 ; CHECK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]] 97 ; CHECK-NEXT: ret 98 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 99 %tmp1 = load i32, i32* %p1, align 2 100 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 101 %tmp2 = load i32, i32* %p2, align 2 102 %tmp3 = add i32 %tmp1, %tmp2 103 ret i32 %tmp3 104 } 105 106 define i64 @ldur_sext_int(i32* %a) nounwind { 107 ; CHECK-LABEL: ldur_sext_int 108 ; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8] 109 ; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 110 ; CHECK-NEXT: ret 111 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 112 %tmp1 = load i32, i32* %p1, align 2 113 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 114 %tmp2 = load i32, i32* %p2, align 2 115 %sexttmp1 = sext i32 %tmp1 to i64 116 %sexttmp2 = sext i32 %tmp2 to i64 117 %tmp3 = add i64 %sexttmp1, %sexttmp2 118 ret i64 %tmp3 119 } 120 121 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind { 122 ; CHECK-LABEL: ldur_half_sext_int_res0 123 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] 124 ; CHECK: sxtw x[[DST1]], w[[DST1]] 125 ; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 126 ; CHECK-NEXT: ret 127 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 128 %tmp1 = load i32, i32* %p1, align 2 129 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 130 %tmp2 = load i32, i32* %p2, align 2 131 %sexttmp1 = zext i32 %tmp1 to i64 132 %sexttmp2 = sext i32 %tmp2 to i64 133 %tmp3 = add i64 %sexttmp1, %sexttmp2 134 ret i64 %tmp3 135 } 136 137 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind { 138 ; CHECK-LABEL: ldur_half_sext_int_res1 139 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] 140 ; CHECK: sxtw x[[DST2]], w[[DST2]] 141 ; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 142 ; CHECK-NEXT: ret 143 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 144 %tmp1 = load i32, i32* %p1, align 2 145 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 146 %tmp2 = load i32, i32* %p2, align 2 147 %sexttmp1 = sext i32 %tmp1 to i64 148 %sexttmp2 = zext i32 %tmp2 to i64 149 %tmp3 = add i64 %sexttmp1, %sexttmp2 150 ret i64 %tmp3 151 } 152 153 154 define i64 @ldur_long(i64* %a) nounwind ssp { 155 ; CHECK-LABEL: ldur_long 156 ; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16] 157 ; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 158 ; CHECK-NEXT: ret 159 %p1 = getelementptr inbounds i64, i64* %a, i64 -1 160 %tmp1 = load i64, i64* %p1, align 2 161 %p2 = getelementptr inbounds i64, i64* %a, i64 -2 162 %tmp2 = load i64, i64* %p2, align 2 163 %tmp3 = add i64 %tmp1, %tmp2 164 ret i64 %tmp3 165 } 166 167 define float @ldur_float(float* %a) { 168 ; CHECK-LABEL: ldur_float 169 ; CHECK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8] 170 ; CHECK-NEXT: fadd s{{[0-9]+}}, [[DST2]], [[DST1]] 171 ; CHECK-NEXT: ret 172 %p1 = getelementptr inbounds float, float* %a, i64 -1 173 %tmp1 = load float, float* %p1, align 2 174 %p2 = getelementptr inbounds float, float* %a, i64 -2 175 %tmp2 = load float, float* %p2, align 2 176 %tmp3 = fadd float %tmp1, %tmp2 177 ret float %tmp3 178 } 179 180 define double @ldur_double(double* %a) { 181 ; CHECK-LABEL: ldur_double 182 ; CHECK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16] 183 ; CHECK-NEXT: fadd d{{[0-9]+}}, [[DST2]], [[DST1]] 184 ; CHECK-NEXT: ret 185 %p1 = getelementptr inbounds double, double* %a, i64 -1 186 %tmp1 = load double, double* %p1, align 2 187 %p2 = getelementptr inbounds double, double* %a, i64 -2 188 %tmp2 = load double, double* %p2, align 2 189 %tmp3 = fadd double %tmp1, %tmp2 190 ret double %tmp3 191 } 192 193 define <2 x double> @ldur_doublex2(<2 x double>* %a) { 194 ; CHECK-LABEL: ldur_doublex2 195 ; CHECK: ldp q[[DST1:[0-9]+]], q[[DST2:[0-9]+]], [x0, #-32] 196 ; CHECK-NEXT: fadd v{{[0-9]+}}.2d, v[[DST2]].2d, v[[DST1]].2d 197 ; CHECK-NEXT: ret 198 %p1 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -1 199 %tmp1 = load <2 x double>, <2 x double>* %p1, align 2 200 %p2 = getelementptr inbounds <2 x double>, <2 x double>* %a, i64 -2 201 %tmp2 = load <2 x double>, <2 x double>* %p2, align 2 202 %tmp3 = fadd <2 x double> %tmp1, %tmp2 203 ret <2 x double> %tmp3 204 } 205 206 ; Now check some boundary conditions 207 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { 208 ; CHECK-LABEL: pairUpBarelyIn 209 ; CHECK-NOT: ldur 210 ; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] 211 ; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 212 ; CHECK-NEXT: ret 213 %p1 = getelementptr inbounds i64, i64* %a, i64 -31 214 %tmp1 = load i64, i64* %p1, align 2 215 %p2 = getelementptr inbounds i64, i64* %a, i64 -32 216 %tmp2 = load i64, i64* %p2, align 2 217 %tmp3 = add i64 %tmp1, %tmp2 218 ret i64 %tmp3 219 } 220 221 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { 222 ; CHECK-LABEL: pairUpBarelyInSext 223 ; CHECK-NOT: ldur 224 ; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] 225 ; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 226 ; CHECK-NEXT: ret 227 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 228 %tmp1 = load i32, i32* %p1, align 2 229 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 230 %tmp2 = load i32, i32* %p2, align 2 231 %sexttmp1 = sext i32 %tmp1 to i64 232 %sexttmp2 = sext i32 %tmp2 to i64 233 %tmp3 = add i64 %sexttmp1, %sexttmp2 234 ret i64 %tmp3 235 } 236 237 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp { 238 ; CHECK-LABEL: pairUpBarelyInHalfSextRes0 239 ; CHECK-NOT: ldur 240 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] 241 ; CHECK: sxtw x[[DST1]], w[[DST1]] 242 ; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 243 ; CHECK-NEXT: ret 244 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 245 %tmp1 = load i32, i32* %p1, align 2 246 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 247 %tmp2 = load i32, i32* %p2, align 2 248 %sexttmp1 = zext i32 %tmp1 to i64 249 %sexttmp2 = sext i32 %tmp2 to i64 250 %tmp3 = add i64 %sexttmp1, %sexttmp2 251 ret i64 %tmp3 252 } 253 254 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp { 255 ; CHECK-LABEL: pairUpBarelyInHalfSextRes1 256 ; CHECK-NOT: ldur 257 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] 258 ; CHECK: sxtw x[[DST2]], w[[DST2]] 259 ; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 260 ; CHECK-NEXT: ret 261 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 262 %tmp1 = load i32, i32* %p1, align 2 263 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 264 %tmp2 = load i32, i32* %p2, align 2 265 %sexttmp1 = sext i32 %tmp1 to i64 266 %sexttmp2 = zext i32 %tmp2 to i64 267 %tmp3 = add i64 %sexttmp1, %sexttmp2 268 ret i64 %tmp3 269 } 270 271 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { 272 ; CHECK-LABEL: pairUpBarelyOut 273 ; CHECK-NOT: ldp 274 ; Don't be fragile about which loads or manipulations of the base register 275 ; are used---just check that there isn't an ldp before the add 276 ; CHECK: add 277 ; CHECK-NEXT: ret 278 %p1 = getelementptr inbounds i64, i64* %a, i64 -32 279 %tmp1 = load i64, i64* %p1, align 2 280 %p2 = getelementptr inbounds i64, i64* %a, i64 -33 281 %tmp2 = load i64, i64* %p2, align 2 282 %tmp3 = add i64 %tmp1, %tmp2 283 ret i64 %tmp3 284 } 285 286 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { 287 ; CHECK-LABEL: pairUpBarelyOutSext 288 ; CHECK-NOT: ldp 289 ; Don't be fragile about which loads or manipulations of the base register 290 ; are used---just check that there isn't an ldp before the add 291 ; CHECK: add 292 ; CHECK-NEXT: ret 293 %p1 = getelementptr inbounds i32, i32* %a, i64 -64 294 %tmp1 = load i32, i32* %p1, align 2 295 %p2 = getelementptr inbounds i32, i32* %a, i64 -65 296 %tmp2 = load i32, i32* %p2, align 2 297 %sexttmp1 = sext i32 %tmp1 to i64 298 %sexttmp2 = sext i32 %tmp2 to i64 299 %tmp3 = add i64 %sexttmp1, %sexttmp2 300 ret i64 %tmp3 301 } 302 303 define i64 @pairUpNotAligned(i64* %a) nounwind ssp { 304 ; CHECK-LABEL: pairUpNotAligned 305 ; CHECK-NOT: ldp 306 ; CHECK: ldur 307 ; CHECK-NEXT: ldur 308 ; CHECK-NEXT: add 309 ; CHECK-NEXT: ret 310 %p1 = getelementptr inbounds i64, i64* %a, i64 -18 311 %bp1 = bitcast i64* %p1 to i8* 312 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 313 %dp1 = bitcast i8* %bp1p1 to i64* 314 %tmp1 = load i64, i64* %dp1, align 1 315 316 %p2 = getelementptr inbounds i64, i64* %a, i64 -17 317 %bp2 = bitcast i64* %p2 to i8* 318 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1 319 %dp2 = bitcast i8* %bp2p1 to i64* 320 %tmp2 = load i64, i64* %dp2, align 1 321 322 %tmp3 = add i64 %tmp1, %tmp2 323 ret i64 %tmp3 324 } 325 326 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { 327 ; CHECK-LABEL: pairUpNotAlignedSext 328 ; CHECK-NOT: ldp 329 ; CHECK: ldursw 330 ; CHECK-NEXT: ldursw 331 ; CHECK-NEXT: add 332 ; CHECK-NEXT: ret 333 %p1 = getelementptr inbounds i32, i32* %a, i64 -18 334 %bp1 = bitcast i32* %p1 to i8* 335 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 336 %dp1 = bitcast i8* %bp1p1 to i32* 337 %tmp1 = load i32, i32* %dp1, align 1 338 339 %p2 = getelementptr inbounds i32, i32* %a, i64 -17 340 %bp2 = bitcast i32* %p2 to i8* 341 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1 342 %dp2 = bitcast i8* %bp2p1 to i32* 343 %tmp2 = load i32, i32* %dp2, align 1 344 345 %sexttmp1 = sext i32 %tmp1 to i64 346 %sexttmp2 = sext i32 %tmp2 to i64 347 %tmp3 = add i64 %sexttmp1, %sexttmp2 348 ret i64 %tmp3 349 } 350 351 declare void @use-ptr(i32*) 352 353 ; CHECK-LABEL: ldp_sext_int_pre 354 ; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8] 355 define i64 @ldp_sext_int_pre(i32* %p) nounwind { 356 %ptr = getelementptr inbounds i32, i32* %p, i64 2 357 call void @use-ptr(i32* %ptr) 358 %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0 359 %tmp = load i32, i32* %add.ptr, align 4 360 %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1 361 %tmp1 = load i32, i32* %add.ptr1, align 4 362 %sexttmp = sext i32 %tmp to i64 363 %sexttmp1 = sext i32 %tmp1 to i64 364 %add = add nsw i64 %sexttmp1, %sexttmp 365 ret i64 %add 366 } 367 368 ; CHECK-LABEL: ldp_sext_int_post 369 ; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8 370 define i64 @ldp_sext_int_post(i32* %p) nounwind { 371 %tmp = load i32, i32* %p, align 4 372 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 373 %tmp1 = load i32, i32* %add.ptr, align 4 374 %sexttmp = sext i32 %tmp to i64 375 %sexttmp1 = sext i32 %tmp1 to i64 376 %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1 377 call void @use-ptr(i32* %ptr) 378 %add = add nsw i64 %sexttmp1, %sexttmp 379 ret i64 %add 380 } 381 382