1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 2 ; 3 ; <rdar://problem/14486451> 4 5 %struct.a = type [256 x i16] 6 %struct.b = type [256 x i32] 7 %struct.c = type [256 x i64] 8 9 define i16 @load_halfword(%struct.a* %ctx, i32 %xor72) nounwind { 10 ; CHECK-LABEL: load_halfword: 11 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 12 ; CHECK: ldrh w0, [x0, [[REG]], lsl #1] 13 %shr81 = lshr i32 %xor72, 9 14 %conv82 = zext i32 %shr81 to i64 15 %idxprom83 = and i64 %conv82, 255 16 %arrayidx86 = getelementptr inbounds %struct.a, %struct.a* %ctx, i64 0, i64 %idxprom83 17 %result = load i16, i16* %arrayidx86, align 2 18 ret i16 %result 19 } 20 21 define i32 @load_word(%struct.b* %ctx, i32 %xor72) nounwind { 22 ; CHECK-LABEL: load_word: 23 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 24 ; CHECK: ldr w0, [x0, [[REG]], lsl #2] 25 %shr81 = lshr i32 %xor72, 9 26 %conv82 = zext i32 %shr81 to i64 27 %idxprom83 = and i64 %conv82, 255 28 %arrayidx86 = getelementptr inbounds %struct.b, %struct.b* %ctx, i64 0, i64 %idxprom83 29 %result = load i32, i32* %arrayidx86, align 4 30 ret i32 %result 31 } 32 33 define i64 @load_doubleword(%struct.c* %ctx, i32 %xor72) nounwind { 34 ; CHECK-LABEL: load_doubleword: 35 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 36 ; CHECK: ldr x0, [x0, [[REG]], lsl #3] 37 %shr81 = lshr i32 %xor72, 9 38 %conv82 = zext i32 %shr81 to i64 39 %idxprom83 = and i64 %conv82, 255 40 %arrayidx86 = getelementptr inbounds %struct.c, %struct.c* %ctx, i64 0, i64 %idxprom83 41 %result = load i64, i64* %arrayidx86, align 8 42 ret i64 %result 43 } 44 45 define void @store_halfword(%struct.a* %ctx, i32 %xor72, i16 %val) nounwind { 46 ; CHECK-LABEL: store_halfword: 47 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 48 ; CHECK: strh w2, [x0, [[REG]], lsl #1] 49 %shr81 = lshr i32 %xor72, 9 50 %conv82 = zext i32 %shr81 to i64 51 %idxprom83 = and i64 %conv82, 255 52 %arrayidx86 = getelementptr inbounds %struct.a, %struct.a* %ctx, i64 0, i64 %idxprom83 53 store i16 %val, i16* %arrayidx86, align 8 54 ret void 55 } 56 57 define void @store_word(%struct.b* %ctx, i32 %xor72, i32 %val) nounwind { 58 ; CHECK-LABEL: store_word: 59 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 60 ; CHECK: str w2, [x0, [[REG]], lsl #2] 61 %shr81 = lshr i32 %xor72, 9 62 %conv82 = zext i32 %shr81 to i64 63 %idxprom83 = and i64 %conv82, 255 64 %arrayidx86 = getelementptr inbounds %struct.b, %struct.b* %ctx, i64 0, i64 %idxprom83 65 store i32 %val, i32* %arrayidx86, align 8 66 ret void 67 } 68 69 define void @store_doubleword(%struct.c* %ctx, i32 %xor72, i64 %val) nounwind { 70 ; CHECK-LABEL: store_doubleword: 71 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 72 ; CHECK: str x2, [x0, [[REG]], lsl #3] 73 %shr81 = lshr i32 %xor72, 9 74 %conv82 = zext i32 %shr81 to i64 75 %idxprom83 = and i64 %conv82, 255 76 %arrayidx86 = getelementptr inbounds %struct.c, %struct.c* %ctx, i64 0, i64 %idxprom83 77 store i64 %val, i64* %arrayidx86, align 8 78 ret void 79 } 80 81 ; Check that we combine a shift into the offset instead of using a narrower load 82 ; when we have a load followed by a trunc 83 84 define i32 @load_doubleword_trunc_word(i64* %ptr, i64 %off) { 85 ; CHECK-LABEL: load_doubleword_trunc_word: 86 ; CHECK: ldr x0, [x0, x1, lsl #3] 87 entry: 88 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 89 %x = load i64, i64* %idx, align 8 90 %trunc = trunc i64 %x to i32 91 ret i32 %trunc 92 } 93 94 define i16 @load_doubleword_trunc_halfword(i64* %ptr, i64 %off) { 95 ; CHECK-LABEL: load_doubleword_trunc_halfword: 96 ; CHECK: ldr x0, [x0, x1, lsl #3] 97 entry: 98 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 99 %x = load i64, i64* %idx, align 8 100 %trunc = trunc i64 %x to i16 101 ret i16 %trunc 102 } 103 104 define i8 @load_doubleword_trunc_byte(i64* %ptr, i64 %off) { 105 ; CHECK-LABEL: load_doubleword_trunc_byte: 106 ; CHECK: ldr x0, [x0, x1, lsl #3] 107 entry: 108 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 109 %x = load i64, i64* %idx, align 8 110 %trunc = trunc i64 %x to i8 111 ret i8 %trunc 112 } 113 114 define i16 @load_word_trunc_halfword(i32* %ptr, i64 %off) { 115 entry: 116 ; CHECK-LABEL: load_word_trunc_halfword: 117 ; CHECK: ldr w0, [x0, x1, lsl #2] 118 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off 119 %x = load i32, i32* %idx, align 8 120 %trunc = trunc i32 %x to i16 121 ret i16 %trunc 122 } 123 124 define i8 @load_word_trunc_byte(i32* %ptr, i64 %off) { 125 ; CHECK-LABEL: load_word_trunc_byte: 126 ; CHECK: ldr w0, [x0, x1, lsl #2] 127 entry: 128 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off 129 %x = load i32, i32* %idx, align 8 130 %trunc = trunc i32 %x to i8 131 ret i8 %trunc 132 } 133 134 define i8 @load_halfword_trunc_byte(i16* %ptr, i64 %off) { 135 ; CHECK-LABEL: load_halfword_trunc_byte: 136 ; CHECK: ldrh w0, [x0, x1, lsl #1] 137 entry: 138 %idx = getelementptr inbounds i16, i16* %ptr, i64 %off 139 %x = load i16, i16* %idx, align 8 140 %trunc = trunc i16 %x to i8 141 ret i8 %trunc 142 } 143 144 ; Check that we do use a narrower load, and so don't combine the shift, when 145 ; the loaded value is zero-extended. 146 147 define i64 @load_doubleword_trunc_word_zext(i64* %ptr, i64 %off) { 148 ; CHECK-LABEL: load_doubleword_trunc_word_zext: 149 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3 150 ; CHECK: ldr w0, [x0, [[REG]]] 151 entry: 152 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 153 %x = load i64, i64* %idx, align 8 154 %trunc = trunc i64 %x to i32 155 %ext = zext i32 %trunc to i64 156 ret i64 %ext 157 } 158 159 define i64 @load_doubleword_trunc_halfword_zext(i64* %ptr, i64 %off) { 160 ; CHECK-LABEL: load_doubleword_trunc_halfword_zext: 161 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3 162 ; CHECK: ldrh w0, [x0, [[REG]]] 163 entry: 164 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 165 %x = load i64, i64* %idx, align 8 166 %trunc = trunc i64 %x to i16 167 %ext = zext i16 %trunc to i64 168 ret i64 %ext 169 } 170 171 define i64 @load_doubleword_trunc_byte_zext(i64* %ptr, i64 %off) { 172 ; CHECK-LABEL: load_doubleword_trunc_byte_zext: 173 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3 174 ; CHECK: ldrb w0, [x0, [[REG]]] 175 entry: 176 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 177 %x = load i64, i64* %idx, align 8 178 %trunc = trunc i64 %x to i8 179 %ext = zext i8 %trunc to i64 180 ret i64 %ext 181 } 182 183 define i64 @load_word_trunc_halfword_zext(i32* %ptr, i64 %off) { 184 ; CHECK-LABEL: load_word_trunc_halfword_zext: 185 ; CHECK: lsl [[REG:x[0-9]+]], x1, #2 186 ; CHECK: ldrh w0, [x0, [[REG]]] 187 entry: 188 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off 189 %x = load i32, i32* %idx, align 8 190 %trunc = trunc i32 %x to i16 191 %ext = zext i16 %trunc to i64 192 ret i64 %ext 193 } 194 195 define i64 @load_word_trunc_byte_zext(i32* %ptr, i64 %off) { 196 ; CHECK-LABEL: load_word_trunc_byte_zext: 197 ; CHECK: lsl [[REG:x[0-9]+]], x1, #2 198 ; CHECK: ldrb w0, [x0, [[REG]]] 199 entry: 200 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off 201 %x = load i32, i32* %idx, align 8 202 %trunc = trunc i32 %x to i8 203 %ext = zext i8 %trunc to i64 204 ret i64 %ext 205 } 206 207 define i64 @load_halfword_trunc_byte_zext(i16* %ptr, i64 %off) { 208 ; CHECK-LABEL: load_halfword_trunc_byte_zext: 209 ; CHECK: lsl [[REG:x[0-9]+]], x1, #1 210 ; CHECK: ldrb w0, [x0, [[REG]]] 211 entry: 212 %idx = getelementptr inbounds i16, i16* %ptr, i64 %off 213 %x = load i16, i16* %idx, align 8 214 %trunc = trunc i16 %x to i8 215 %ext = zext i8 %trunc to i64 216 ret i64 %ext 217 } 218 219 ; Check that we do use a narrower load, and so don't combine the shift, when 220 ; the loaded value is sign-extended. 221 222 define i64 @load_doubleword_trunc_word_sext(i64* %ptr, i64 %off) { 223 ; CHECK-LABEL: load_doubleword_trunc_word_sext: 224 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3 225 ; CHECK: ldrsw x0, [x0, [[REG]]] 226 entry: 227 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 228 %x = load i64, i64* %idx, align 8 229 %trunc = trunc i64 %x to i32 230 %ext = sext i32 %trunc to i64 231 ret i64 %ext 232 } 233 234 define i64 @load_doubleword_trunc_halfword_sext(i64* %ptr, i64 %off) { 235 ; CHECK-LABEL: load_doubleword_trunc_halfword_sext: 236 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3 237 ; CHECK: ldrsh x0, [x0, [[REG]]] 238 entry: 239 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 240 %x = load i64, i64* %idx, align 8 241 %trunc = trunc i64 %x to i16 242 %ext = sext i16 %trunc to i64 243 ret i64 %ext 244 } 245 246 define i64 @load_doubleword_trunc_byte_sext(i64* %ptr, i64 %off) { 247 ; CHECK-LABEL: load_doubleword_trunc_byte_sext: 248 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3 249 ; CHECK: ldrsb x0, [x0, [[REG]]] 250 entry: 251 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 252 %x = load i64, i64* %idx, align 8 253 %trunc = trunc i64 %x to i8 254 %ext = sext i8 %trunc to i64 255 ret i64 %ext 256 } 257 258 define i64 @load_word_trunc_halfword_sext(i32* %ptr, i64 %off) { 259 ; CHECK-LABEL: load_word_trunc_halfword_sext: 260 ; CHECK: lsl [[REG:x[0-9]+]], x1, #2 261 ; CHECK: ldrsh x0, [x0, [[REG]]] 262 entry: 263 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off 264 %x = load i32, i32* %idx, align 8 265 %trunc = trunc i32 %x to i16 266 %ext = sext i16 %trunc to i64 267 ret i64 %ext 268 } 269 270 define i64 @load_word_trunc_byte_sext(i32* %ptr, i64 %off) { 271 ; CHECK-LABEL: load_word_trunc_byte_sext: 272 ; CHECK: lsl [[REG:x[0-9]+]], x1, #2 273 ; CHECK: ldrsb x0, [x0, [[REG]]] 274 entry: 275 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off 276 %x = load i32, i32* %idx, align 8 277 %trunc = trunc i32 %x to i8 278 %ext = sext i8 %trunc to i64 279 ret i64 %ext 280 } 281 282 define i64 @load_halfword_trunc_byte_sext(i16* %ptr, i64 %off) { 283 ; CHECK-LABEL: load_halfword_trunc_byte_sext: 284 ; CHECK: lsl [[REG:x[0-9]+]], x1, #1 285 ; CHECK: ldrsb x0, [x0, [[REG]]] 286 entry: 287 %idx = getelementptr inbounds i16, i16* %ptr, i64 %off 288 %x = load i16, i16* %idx, align 8 289 %trunc = trunc i16 %x to i8 290 %ext = sext i8 %trunc to i64 291 ret i64 %ext 292 } 293 294 ; Check that we don't combine the shift, and so will use a narrower load, when 295 ; the shift is used more than once. 296 297 define i32 @load_doubleword_trunc_word_reuse_shift(i64* %ptr, i64 %off) { 298 ; CHECK-LABEL: load_doubleword_trunc_word_reuse_shift: 299 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 300 ; CHECK: ldr w[[REG2:[0-9]+]], [x0, x[[REG1]]] 301 ; CHECL: add w0, w[[REG2]], w[[REG1]] 302 entry: 303 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 304 %x = load i64, i64* %idx, align 8 305 %trunc = trunc i64 %x to i32 306 %lsl = shl i64 %off, 3 307 %lsl.trunc = trunc i64 %lsl to i32 308 %add = add i32 %trunc, %lsl.trunc 309 ret i32 %add 310 } 311 312 define i16 @load_doubleword_trunc_halfword_reuse_shift(i64* %ptr, i64 %off) { 313 ; CHECK-LABEL: load_doubleword_trunc_halfword_reuse_shift: 314 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 315 ; CHECK: ldrh w[[REG2:[0-9]+]], [x0, x[[REG1]]] 316 ; CHECK: add w0, w[[REG2]], w[[REG1]] 317 entry: 318 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 319 %x = load i64, i64* %idx, align 8 320 %trunc = trunc i64 %x to i16 321 %lsl = shl i64 %off, 3 322 %lsl.trunc = trunc i64 %lsl to i16 323 %add = add i16 %trunc, %lsl.trunc 324 ret i16 %add 325 } 326 327 define i8 @load_doubleword_trunc_byte_reuse_shift(i64* %ptr, i64 %off) { 328 ; CHECK-LABEL: load_doubleword_trunc_byte_reuse_shift: 329 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 330 ; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] 331 ; CHECK: add w0, w[[REG2]], w[[REG1]] 332 entry: 333 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off 334 %x = load i64, i64* %idx, align 8 335 %trunc = trunc i64 %x to i8 336 %lsl = shl i64 %off, 3 337 %lsl.trunc = trunc i64 %lsl to i8 338 %add = add i8 %trunc, %lsl.trunc 339 ret i8 %add 340 } 341 342 define i16 @load_word_trunc_halfword_reuse_shift(i32* %ptr, i64 %off) { 343 entry: 344 ; CHECK-LABEL: load_word_trunc_halfword_reuse_shift: 345 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #2 346 ; CHECK: ldrh w[[REG2:[0-9]+]], [x0, x[[REG1]]] 347 ; CHECK: add w0, w[[REG2]], w[[REG1]] 348 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off 349 %x = load i32, i32* %idx, align 8 350 %trunc = trunc i32 %x to i16 351 %lsl = shl i64 %off, 2 352 %lsl.trunc = trunc i64 %lsl to i16 353 %add = add i16 %trunc, %lsl.trunc 354 ret i16 %add 355 } 356 357 define i8 @load_word_trunc_byte_reuse_shift(i32* %ptr, i64 %off) { 358 ; CHECK-LABEL: load_word_trunc_byte_reuse_shift: 359 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #2 360 ; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] 361 ; CHECK: add w0, w[[REG2]], w[[REG1]] 362 entry: 363 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off 364 %x = load i32, i32* %idx, align 8 365 %trunc = trunc i32 %x to i8 366 %lsl = shl i64 %off, 2 367 %lsl.trunc = trunc i64 %lsl to i8 368 %add = add i8 %trunc, %lsl.trunc 369 ret i8 %add 370 } 371 372 define i8 @load_halfword_trunc_byte_reuse_shift(i16* %ptr, i64 %off) { 373 ; CHECK-LABEL: load_halfword_trunc_byte_reuse_shift: 374 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #1 375 ; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] 376 ; CHECK: add w0, w[[REG2]], w[[REG1]] 377 entry: 378 %idx = getelementptr inbounds i16, i16* %ptr, i64 %off 379 %x = load i16, i16* %idx, align 8 380 %trunc = trunc i16 %x to i8 381 %lsl = shl i64 %off, 1 382 %lsl.trunc = trunc i64 %lsl to i8 383 %add = add i8 %trunc, %lsl.trunc 384 ret i8 %add 385 } 386