1 ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s 2 ; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\ 3 ; RUN: -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s 4 5 ; CHECK: ldp_int 6 ; CHECK: ldp 7 define i32 @ldp_int(i32* %p) nounwind { 8 %tmp = load i32, i32* %p, align 4 9 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 10 %tmp1 = load i32, i32* %add.ptr, align 4 11 %add = add nsw i32 %tmp1, %tmp 12 ret i32 %add 13 } 14 15 ; CHECK: ldp_sext_int 16 ; CHECK: ldpsw 17 define i64 @ldp_sext_int(i32* %p) nounwind { 18 %tmp = load i32, i32* %p, align 4 19 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 20 %tmp1 = load i32, i32* %add.ptr, align 4 21 %sexttmp = sext i32 %tmp to i64 22 %sexttmp1 = sext i32 %tmp1 to i64 23 %add = add nsw i64 %sexttmp1, %sexttmp 24 ret i64 %add 25 } 26 27 ; CHECK-LABEL: ldp_half_sext_res0_int: 28 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] 29 ; CHECK: sxtw x[[DST1]], w[[DST1]] 30 define i64 @ldp_half_sext_res0_int(i32* %p) nounwind { 31 %tmp = load i32, i32* %p, align 4 32 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 33 %tmp1 = load i32, i32* %add.ptr, align 4 34 %sexttmp = sext i32 %tmp to i64 35 %sexttmp1 = zext i32 %tmp1 to i64 36 %add = add nsw i64 %sexttmp1, %sexttmp 37 ret i64 %add 38 } 39 40 ; CHECK-LABEL: ldp_half_sext_res1_int: 41 ; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0] 42 ; CHECK: sxtw x[[DST2]], w[[DST2]] 43 define i64 @ldp_half_sext_res1_int(i32* %p) nounwind { 44 %tmp = load i32, i32* %p, align 4 45 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 46 %tmp1 = load i32, i32* %add.ptr, align 4 47 %sexttmp = zext i32 %tmp to i64 48 %sexttmp1 = sext i32 %tmp1 to i64 49 %add = add nsw i64 %sexttmp1, %sexttmp 50 ret i64 %add 51 } 52 53 54 ; CHECK: ldp_long 55 ; CHECK: ldp 56 define i64 @ldp_long(i64* %p) nounwind { 57 %tmp = load i64, i64* %p, align 8 58 %add.ptr = getelementptr inbounds i64, i64* %p, i64 1 59 %tmp1 = load i64, i64* %add.ptr, align 8 60 %add = add nsw i64 %tmp1, %tmp 61 ret i64 %add 62 } 63 64 ; CHECK: ldp_float 65 ; CHECK: ldp 66 define float @ldp_float(float* %p) nounwind { 67 %tmp = load float, float* %p, align 4 68 %add.ptr = getelementptr inbounds float, float* %p, i64 1 69 %tmp1 = load float, float* %add.ptr, align 4 70 %add = fadd float %tmp, %tmp1 71 ret float %add 72 } 73 74 ; CHECK: ldp_double 75 ; CHECK: ldp 76 define double @ldp_double(double* %p) nounwind { 77 %tmp = load double, double* %p, align 8 78 %add.ptr = getelementptr inbounds double, double* %p, i64 1 79 %tmp1 = load double, double* %add.ptr, align 8 80 %add = fadd double %tmp, %tmp1 81 ret double %add 82 } 83 84 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate 85 define i32 @ldur_int(i32* %a) nounwind { 86 ; LDUR_CHK: ldur_int 87 ; LDUR_CHK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8] 88 ; LDUR_CHK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]] 89 ; LDUR_CHK-NEXT: ret 90 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 91 %tmp1 = load i32, i32* %p1, align 2 92 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 93 %tmp2 = load i32, i32* %p2, align 2 94 %tmp3 = add i32 %tmp1, %tmp2 95 ret i32 %tmp3 96 } 97 98 define i64 @ldur_sext_int(i32* %a) nounwind { 99 ; LDUR_CHK: ldur_sext_int 100 ; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8] 101 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 102 ; LDUR_CHK-NEXT: ret 103 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 104 %tmp1 = load i32, i32* %p1, align 2 105 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 106 %tmp2 = load i32, i32* %p2, align 2 107 %sexttmp1 = sext i32 %tmp1 to i64 108 %sexttmp2 = sext i32 %tmp2 to i64 109 %tmp3 = add i64 %sexttmp1, %sexttmp2 110 ret i64 %tmp3 111 } 112 113 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind { 114 ; LDUR_CHK: ldur_half_sext_int_res0 115 ; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] 116 ; LDUR_CHK: sxtw x[[DST1]], w[[DST1]] 117 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 118 ; LDUR_CHK-NEXT: ret 119 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 120 %tmp1 = load i32, i32* %p1, align 2 121 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 122 %tmp2 = load i32, i32* %p2, align 2 123 %sexttmp1 = zext i32 %tmp1 to i64 124 %sexttmp2 = sext i32 %tmp2 to i64 125 %tmp3 = add i64 %sexttmp1, %sexttmp2 126 ret i64 %tmp3 127 } 128 129 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind { 130 ; LDUR_CHK: ldur_half_sext_int_res1 131 ; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] 132 ; LDUR_CHK: sxtw x[[DST2]], w[[DST2]] 133 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 134 ; LDUR_CHK-NEXT: ret 135 %p1 = getelementptr inbounds i32, i32* %a, i32 -1 136 %tmp1 = load i32, i32* %p1, align 2 137 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 138 %tmp2 = load i32, i32* %p2, align 2 139 %sexttmp1 = sext i32 %tmp1 to i64 140 %sexttmp2 = zext i32 %tmp2 to i64 141 %tmp3 = add i64 %sexttmp1, %sexttmp2 142 ret i64 %tmp3 143 } 144 145 146 define i64 @ldur_long(i64* %a) nounwind ssp { 147 ; LDUR_CHK: ldur_long 148 ; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16] 149 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 150 ; LDUR_CHK-NEXT: ret 151 %p1 = getelementptr inbounds i64, i64* %a, i64 -1 152 %tmp1 = load i64, i64* %p1, align 2 153 %p2 = getelementptr inbounds i64, i64* %a, i64 -2 154 %tmp2 = load i64, i64* %p2, align 2 155 %tmp3 = add i64 %tmp1, %tmp2 156 ret i64 %tmp3 157 } 158 159 define float @ldur_float(float* %a) { 160 ; LDUR_CHK: ldur_float 161 ; LDUR_CHK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8] 162 ; LDUR_CHK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]] 163 ; LDUR_CHK-NEXT: ret 164 %p1 = getelementptr inbounds float, float* %a, i64 -1 165 %tmp1 = load float, float* %p1, align 2 166 %p2 = getelementptr inbounds float, float* %a, i64 -2 167 %tmp2 = load float, float* %p2, align 2 168 %tmp3 = fadd float %tmp1, %tmp2 169 ret float %tmp3 170 } 171 172 define double @ldur_double(double* %a) { 173 ; LDUR_CHK: ldur_double 174 ; LDUR_CHK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16] 175 ; LDUR_CHK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]] 176 ; LDUR_CHK-NEXT: ret 177 %p1 = getelementptr inbounds double, double* %a, i64 -1 178 %tmp1 = load double, double* %p1, align 2 179 %p2 = getelementptr inbounds double, double* %a, i64 -2 180 %tmp2 = load double, double* %p2, align 2 181 %tmp3 = fadd double %tmp1, %tmp2 182 ret double %tmp3 183 } 184 185 ; Now check some boundary conditions 186 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { 187 ; LDUR_CHK: pairUpBarelyIn 188 ; LDUR_CHK-NOT: ldur 189 ; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] 190 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 191 ; LDUR_CHK-NEXT: ret 192 %p1 = getelementptr inbounds i64, i64* %a, i64 -31 193 %tmp1 = load i64, i64* %p1, align 2 194 %p2 = getelementptr inbounds i64, i64* %a, i64 -32 195 %tmp2 = load i64, i64* %p2, align 2 196 %tmp3 = add i64 %tmp1, %tmp2 197 ret i64 %tmp3 198 } 199 200 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { 201 ; LDUR_CHK: pairUpBarelyInSext 202 ; LDUR_CHK-NOT: ldur 203 ; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] 204 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] 205 ; LDUR_CHK-NEXT: ret 206 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 207 %tmp1 = load i32, i32* %p1, align 2 208 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 209 %tmp2 = load i32, i32* %p2, align 2 210 %sexttmp1 = sext i32 %tmp1 to i64 211 %sexttmp2 = sext i32 %tmp2 to i64 212 %tmp3 = add i64 %sexttmp1, %sexttmp2 213 ret i64 %tmp3 214 } 215 216 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp { 217 ; LDUR_CHK: pairUpBarelyInHalfSextRes0 218 ; LDUR_CHK-NOT: ldur 219 ; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] 220 ; LDUR_CHK: sxtw x[[DST1]], w[[DST1]] 221 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 222 ; LDUR_CHK-NEXT: ret 223 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 224 %tmp1 = load i32, i32* %p1, align 2 225 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 226 %tmp2 = load i32, i32* %p2, align 2 227 %sexttmp1 = zext i32 %tmp1 to i64 228 %sexttmp2 = sext i32 %tmp2 to i64 229 %tmp3 = add i64 %sexttmp1, %sexttmp2 230 ret i64 %tmp3 231 } 232 233 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp { 234 ; LDUR_CHK: pairUpBarelyInHalfSextRes1 235 ; LDUR_CHK-NOT: ldur 236 ; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] 237 ; LDUR_CHK: sxtw x[[DST2]], w[[DST2]] 238 ; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] 239 ; LDUR_CHK-NEXT: ret 240 %p1 = getelementptr inbounds i32, i32* %a, i64 -63 241 %tmp1 = load i32, i32* %p1, align 2 242 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 243 %tmp2 = load i32, i32* %p2, align 2 244 %sexttmp1 = sext i32 %tmp1 to i64 245 %sexttmp2 = zext i32 %tmp2 to i64 246 %tmp3 = add i64 %sexttmp1, %sexttmp2 247 ret i64 %tmp3 248 } 249 250 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { 251 ; LDUR_CHK: pairUpBarelyOut 252 ; LDUR_CHK-NOT: ldp 253 ; Don't be fragile about which loads or manipulations of the base register 254 ; are used---just check that there isn't an ldp before the add 255 ; LDUR_CHK: add 256 ; LDUR_CHK-NEXT: ret 257 %p1 = getelementptr inbounds i64, i64* %a, i64 -32 258 %tmp1 = load i64, i64* %p1, align 2 259 %p2 = getelementptr inbounds i64, i64* %a, i64 -33 260 %tmp2 = load i64, i64* %p2, align 2 261 %tmp3 = add i64 %tmp1, %tmp2 262 ret i64 %tmp3 263 } 264 265 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { 266 ; LDUR_CHK: pairUpBarelyOutSext 267 ; LDUR_CHK-NOT: ldp 268 ; Don't be fragile about which loads or manipulations of the base register 269 ; are used---just check that there isn't an ldp before the add 270 ; LDUR_CHK: add 271 ; LDUR_CHK-NEXT: ret 272 %p1 = getelementptr inbounds i32, i32* %a, i64 -64 273 %tmp1 = load i32, i32* %p1, align 2 274 %p2 = getelementptr inbounds i32, i32* %a, i64 -65 275 %tmp2 = load i32, i32* %p2, align 2 276 %sexttmp1 = sext i32 %tmp1 to i64 277 %sexttmp2 = sext i32 %tmp2 to i64 278 %tmp3 = add i64 %sexttmp1, %sexttmp2 279 ret i64 %tmp3 280 } 281 282 define i64 @pairUpNotAligned(i64* %a) nounwind ssp { 283 ; LDUR_CHK: pairUpNotAligned 284 ; LDUR_CHK-NOT: ldp 285 ; LDUR_CHK: ldur 286 ; LDUR_CHK-NEXT: ldur 287 ; LDUR_CHK-NEXT: add 288 ; LDUR_CHK-NEXT: ret 289 %p1 = getelementptr inbounds i64, i64* %a, i64 -18 290 %bp1 = bitcast i64* %p1 to i8* 291 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 292 %dp1 = bitcast i8* %bp1p1 to i64* 293 %tmp1 = load i64, i64* %dp1, align 1 294 295 %p2 = getelementptr inbounds i64, i64* %a, i64 -17 296 %bp2 = bitcast i64* %p2 to i8* 297 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1 298 %dp2 = bitcast i8* %bp2p1 to i64* 299 %tmp2 = load i64, i64* %dp2, align 1 300 301 %tmp3 = add i64 %tmp1, %tmp2 302 ret i64 %tmp3 303 } 304 305 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { 306 ; LDUR_CHK: pairUpNotAlignedSext 307 ; LDUR_CHK-NOT: ldp 308 ; LDUR_CHK: ldursw 309 ; LDUR_CHK-NEXT: ldursw 310 ; LDUR_CHK-NEXT: add 311 ; LDUR_CHK-NEXT: ret 312 %p1 = getelementptr inbounds i32, i32* %a, i64 -18 313 %bp1 = bitcast i32* %p1 to i8* 314 %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 315 %dp1 = bitcast i8* %bp1p1 to i32* 316 %tmp1 = load i32, i32* %dp1, align 1 317 318 %p2 = getelementptr inbounds i32, i32* %a, i64 -17 319 %bp2 = bitcast i32* %p2 to i8* 320 %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1 321 %dp2 = bitcast i8* %bp2p1 to i32* 322 %tmp2 = load i32, i32* %dp2, align 1 323 324 %sexttmp1 = sext i32 %tmp1 to i64 325 %sexttmp2 = sext i32 %tmp2 to i64 326 %tmp3 = add i64 %sexttmp1, %sexttmp2 327 ret i64 %tmp3 328 } 329