1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP 3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE 4 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64 5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64 6 7 ; i8* p; 8 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) 9 define i32 @load_i32_by_i8(i32* %arg) { 10 ; CHECK-LABEL: load_i32_by_i8: 11 ; CHECK: # %bb.0: 12 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 13 ; CHECK-NEXT: movl (%eax), %eax 14 ; CHECK-NEXT: retl 15 ; 16 ; CHECK64-LABEL: load_i32_by_i8: 17 ; CHECK64: # %bb.0: 18 ; CHECK64-NEXT: movl (%rdi), %eax 19 ; CHECK64-NEXT: retq 20 %tmp = bitcast i32* %arg to i8* 21 %tmp1 = load i8, i8* %tmp, align 1 22 %tmp2 = zext i8 %tmp1 to i32 23 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 24 %tmp4 = load i8, i8* %tmp3, align 1 25 %tmp5 = zext i8 %tmp4 to i32 26 %tmp6 = shl nuw nsw i32 %tmp5, 8 27 %tmp7 = or i32 %tmp6, %tmp2 28 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 29 %tmp9 = load i8, i8* %tmp8, align 1 30 %tmp10 = zext i8 %tmp9 to i32 31 %tmp11 = shl nuw nsw i32 %tmp10, 16 32 %tmp12 = or i32 %tmp7, %tmp11 33 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3 34 %tmp14 = load i8, i8* %tmp13, align 1 35 %tmp15 = zext i8 %tmp14 to i32 36 %tmp16 = shl nuw nsw i32 %tmp15, 24 37 %tmp17 = or i32 %tmp12, %tmp16 38 ret i32 %tmp17 39 } 40 41 ; i8* p; 42 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 43 define i32 @load_i32_by_i8_bswap(i32* %arg) { 44 ; BSWAP-LABEL: load_i32_by_i8_bswap: 45 ; BSWAP: # %bb.0: 46 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 47 ; BSWAP-NEXT: movl (%eax), %eax 48 ; BSWAP-NEXT: bswapl %eax 49 ; BSWAP-NEXT: retl 50 ; 51 ; MOVBE-LABEL: load_i32_by_i8_bswap: 52 ; MOVBE: # %bb.0: 53 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 54 ; MOVBE-NEXT: movbel (%eax), %eax 55 ; MOVBE-NEXT: retl 56 ; 57 ; BSWAP64-LABEL: load_i32_by_i8_bswap: 58 ; BSWAP64: # %bb.0: 59 ; BSWAP64-NEXT: movl (%rdi), %eax 60 ; BSWAP64-NEXT: bswapl %eax 61 ; BSWAP64-NEXT: retq 62 ; 63 ; MOVBE64-LABEL: load_i32_by_i8_bswap: 64 ; MOVBE64: # %bb.0: 65 ; MOVBE64-NEXT: movbel (%rdi), %eax 66 ; MOVBE64-NEXT: retq 67 %tmp = bitcast i32* %arg to i8* 68 %tmp1 = load i8, i8* %tmp, align 1 69 %tmp2 = zext i8 %tmp1 to i32 70 %tmp3 = shl nuw nsw i32 %tmp2, 24 71 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 72 %tmp5 = load i8, i8* %tmp4, align 1 73 %tmp6 = zext i8 %tmp5 to i32 74 %tmp7 = shl nuw nsw i32 %tmp6, 16 75 %tmp8 = or i32 %tmp7, %tmp3 76 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 77 %tmp10 = load i8, i8* %tmp9, align 1 78 %tmp11 = zext i8 %tmp10 to i32 79 %tmp12 = shl nuw nsw i32 %tmp11, 8 80 %tmp13 = or i32 %tmp8, %tmp12 81 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 82 %tmp15 = load i8, i8* %tmp14, align 1 83 %tmp16 = zext i8 %tmp15 to i32 84 %tmp17 = or i32 %tmp13, %tmp16 85 ret i32 %tmp17 86 } 87 88 ; i16* p; 89 ; (i32) p[0] | ((i32) p[1] << 16) 90 define i32 @load_i32_by_i16(i32* %arg) { 91 ; CHECK-LABEL: load_i32_by_i16: 92 ; CHECK: # %bb.0: 93 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 94 ; CHECK-NEXT: movl (%eax), %eax 95 ; CHECK-NEXT: retl 96 ; 97 ; CHECK64-LABEL: load_i32_by_i16: 98 ; CHECK64: # %bb.0: 99 ; CHECK64-NEXT: movl (%rdi), %eax 100 ; CHECK64-NEXT: retq 101 %tmp = bitcast i32* %arg to i16* 102 %tmp1 = load i16, i16* %tmp, align 1 103 %tmp2 = zext i16 %tmp1 to i32 104 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 105 %tmp4 = load i16, i16* %tmp3, align 1 106 %tmp5 = zext i16 %tmp4 to i32 107 %tmp6 = shl nuw nsw i32 %tmp5, 16 108 %tmp7 = or i32 %tmp6, %tmp2 109 ret i32 %tmp7 110 } 111 112 ; i16* p_16; 113 ; i8* p_8 = (i8*) p_16; 114 ; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24) 115 define i32 @load_i32_by_i16_i8(i32* %arg) { 116 ; CHECK-LABEL: load_i32_by_i16_i8: 117 ; CHECK: # %bb.0: 118 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 119 ; CHECK-NEXT: movl (%eax), %eax 120 ; CHECK-NEXT: retl 121 ; 122 ; CHECK64-LABEL: load_i32_by_i16_i8: 123 ; CHECK64: # %bb.0: 124 ; CHECK64-NEXT: movl (%rdi), %eax 125 ; CHECK64-NEXT: retq 126 %tmp = bitcast i32* %arg to i16* 127 %tmp1 = bitcast i32* %arg to i8* 128 %tmp2 = load i16, i16* %tmp, align 1 129 %tmp3 = zext i16 %tmp2 to i32 130 %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2 131 %tmp5 = load i8, i8* %tmp4, align 1 132 %tmp6 = zext i8 %tmp5 to i32 133 %tmp7 = shl nuw nsw i32 %tmp6, 16 134 %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3 135 %tmp9 = load i8, i8* %tmp8, align 1 136 %tmp10 = zext i8 %tmp9 to i32 137 %tmp11 = shl nuw nsw i32 %tmp10, 24 138 %tmp12 = or i32 %tmp7, %tmp11 139 %tmp13 = or i32 %tmp12, %tmp3 140 ret i32 %tmp13 141 } 142 143 144 ; i8* p; 145 ; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16) 146 define i32 @load_i32_by_i16_by_i8(i32* %arg) { 147 ; CHECK-LABEL: load_i32_by_i16_by_i8: 148 ; CHECK: # %bb.0: 149 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 150 ; CHECK-NEXT: movl (%eax), %eax 151 ; CHECK-NEXT: retl 152 ; 153 ; CHECK64-LABEL: load_i32_by_i16_by_i8: 154 ; CHECK64: # %bb.0: 155 ; CHECK64-NEXT: movl (%rdi), %eax 156 ; CHECK64-NEXT: retq 157 %tmp = bitcast i32* %arg to i8* 158 %tmp1 = load i8, i8* %tmp, align 1 159 %tmp2 = zext i8 %tmp1 to i16 160 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 161 %tmp4 = load i8, i8* %tmp3, align 1 162 %tmp5 = zext i8 %tmp4 to i16 163 %tmp6 = shl nuw nsw i16 %tmp5, 8 164 %tmp7 = or i16 %tmp6, %tmp2 165 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 166 %tmp9 = load i8, i8* %tmp8, align 1 167 %tmp10 = zext i8 %tmp9 to i16 168 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3 169 %tmp12 = load i8, i8* %tmp11, align 1 170 %tmp13 = zext i8 %tmp12 to i16 171 %tmp14 = shl nuw nsw i16 %tmp13, 8 172 %tmp15 = or i16 %tmp14, %tmp10 173 %tmp16 = zext i16 %tmp7 to i32 174 %tmp17 = zext i16 %tmp15 to i32 175 %tmp18 = shl nuw nsw i32 %tmp17, 16 176 %tmp19 = or i32 %tmp18, %tmp16 177 ret i32 %tmp19 178 } 179 180 ; i8* p; 181 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4]) 182 define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) { 183 ; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap: 184 ; BSWAP: # %bb.0: 185 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 186 ; BSWAP-NEXT: movl (%eax), %eax 187 ; BSWAP-NEXT: bswapl %eax 188 ; BSWAP-NEXT: retl 189 ; 190 ; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap: 191 ; MOVBE: # %bb.0: 192 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 193 ; MOVBE-NEXT: movbel (%eax), %eax 194 ; MOVBE-NEXT: retl 195 ; 196 ; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap: 197 ; BSWAP64: # %bb.0: 198 ; BSWAP64-NEXT: movl (%rdi), %eax 199 ; BSWAP64-NEXT: bswapl %eax 200 ; BSWAP64-NEXT: retq 201 ; 202 ; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap: 203 ; MOVBE64: # %bb.0: 204 ; MOVBE64-NEXT: movbel (%rdi), %eax 205 ; MOVBE64-NEXT: retq 206 %tmp = bitcast i32* %arg to i8* 207 %tmp1 = load i8, i8* %tmp, align 1 208 %tmp2 = zext i8 %tmp1 to i16 209 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 210 %tmp4 = load i8, i8* %tmp3, align 1 211 %tmp5 = zext i8 %tmp4 to i16 212 %tmp6 = shl nuw nsw i16 %tmp2, 8 213 %tmp7 = or i16 %tmp6, %tmp5 214 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 215 %tmp9 = load i8, i8* %tmp8, align 1 216 %tmp10 = zext i8 %tmp9 to i16 217 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3 218 %tmp12 = load i8, i8* %tmp11, align 1 219 %tmp13 = zext i8 %tmp12 to i16 220 %tmp14 = shl nuw nsw i16 %tmp10, 8 221 %tmp15 = or i16 %tmp14, %tmp13 222 %tmp16 = zext i16 %tmp7 to i32 223 %tmp17 = zext i16 %tmp15 to i32 224 %tmp18 = shl nuw nsw i32 %tmp16, 16 225 %tmp19 = or i32 %tmp18, %tmp17 226 ret i32 %tmp19 227 } 228 229 ; i8* p; 230 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) 231 define i64 @load_i64_by_i8(i64* %arg) { 232 ; CHECK-LABEL: load_i64_by_i8: 233 ; CHECK: # %bb.0: 234 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 235 ; CHECK-NEXT: movl (%ecx), %eax 236 ; CHECK-NEXT: movl 4(%ecx), %edx 237 ; CHECK-NEXT: retl 238 ; 239 ; CHECK64-LABEL: load_i64_by_i8: 240 ; CHECK64: # %bb.0: 241 ; CHECK64-NEXT: movq (%rdi), %rax 242 ; CHECK64-NEXT: retq 243 %tmp = bitcast i64* %arg to i8* 244 %tmp1 = load i8, i8* %tmp, align 1 245 %tmp2 = zext i8 %tmp1 to i64 246 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1 247 %tmp4 = load i8, i8* %tmp3, align 1 248 %tmp5 = zext i8 %tmp4 to i64 249 %tmp6 = shl nuw nsw i64 %tmp5, 8 250 %tmp7 = or i64 %tmp6, %tmp2 251 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2 252 %tmp9 = load i8, i8* %tmp8, align 1 253 %tmp10 = zext i8 %tmp9 to i64 254 %tmp11 = shl nuw nsw i64 %tmp10, 16 255 %tmp12 = or i64 %tmp7, %tmp11 256 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3 257 %tmp14 = load i8, i8* %tmp13, align 1 258 %tmp15 = zext i8 %tmp14 to i64 259 %tmp16 = shl nuw nsw i64 %tmp15, 24 260 %tmp17 = or i64 %tmp12, %tmp16 261 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4 262 %tmp19 = load i8, i8* %tmp18, align 1 263 %tmp20 = zext i8 %tmp19 to i64 264 %tmp21 = shl nuw nsw i64 %tmp20, 32 265 %tmp22 = or i64 %tmp17, %tmp21 266 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5 267 %tmp24 = load i8, i8* %tmp23, align 1 268 %tmp25 = zext i8 %tmp24 to i64 269 %tmp26 = shl nuw nsw i64 %tmp25, 40 270 %tmp27 = or i64 %tmp22, %tmp26 271 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6 272 %tmp29 = load i8, i8* %tmp28, align 1 273 %tmp30 = zext i8 %tmp29 to i64 274 %tmp31 = shl nuw nsw i64 %tmp30, 48 275 %tmp32 = or i64 %tmp27, %tmp31 276 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7 277 %tmp34 = load i8, i8* %tmp33, align 1 278 %tmp35 = zext i8 %tmp34 to i64 279 %tmp36 = shl nuw i64 %tmp35, 56 280 %tmp37 = or i64 %tmp32, %tmp36 281 ret i64 %tmp37 282 } 283 284 ; i8* p; 285 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] 286 define i64 @load_i64_by_i8_bswap(i64* %arg) { 287 ; BSWAP-LABEL: load_i64_by_i8_bswap: 288 ; BSWAP: # %bb.0: 289 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 290 ; BSWAP-NEXT: movl (%eax), %edx 291 ; BSWAP-NEXT: movl 4(%eax), %eax 292 ; BSWAP-NEXT: bswapl %eax 293 ; BSWAP-NEXT: bswapl %edx 294 ; BSWAP-NEXT: retl 295 ; 296 ; MOVBE-LABEL: load_i64_by_i8_bswap: 297 ; MOVBE: # %bb.0: 298 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx 299 ; MOVBE-NEXT: movbel 4(%ecx), %eax 300 ; MOVBE-NEXT: movbel (%ecx), %edx 301 ; MOVBE-NEXT: retl 302 ; 303 ; BSWAP64-LABEL: load_i64_by_i8_bswap: 304 ; BSWAP64: # %bb.0: 305 ; BSWAP64-NEXT: movq (%rdi), %rax 306 ; BSWAP64-NEXT: bswapq %rax 307 ; BSWAP64-NEXT: retq 308 ; 309 ; MOVBE64-LABEL: load_i64_by_i8_bswap: 310 ; MOVBE64: # %bb.0: 311 ; MOVBE64-NEXT: movbeq (%rdi), %rax 312 ; MOVBE64-NEXT: retq 313 %tmp = bitcast i64* %arg to i8* 314 %tmp1 = load i8, i8* %tmp, align 1 315 %tmp2 = zext i8 %tmp1 to i64 316 %tmp3 = shl nuw i64 %tmp2, 56 317 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1 318 %tmp5 = load i8, i8* %tmp4, align 1 319 %tmp6 = zext i8 %tmp5 to i64 320 %tmp7 = shl nuw nsw i64 %tmp6, 48 321 %tmp8 = or i64 %tmp7, %tmp3 322 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2 323 %tmp10 = load i8, i8* %tmp9, align 1 324 %tmp11 = zext i8 %tmp10 to i64 325 %tmp12 = shl nuw nsw i64 %tmp11, 40 326 %tmp13 = or i64 %tmp8, %tmp12 327 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3 328 %tmp15 = load i8, i8* %tmp14, align 1 329 %tmp16 = zext i8 %tmp15 to i64 330 %tmp17 = shl nuw nsw i64 %tmp16, 32 331 %tmp18 = or i64 %tmp13, %tmp17 332 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4 333 %tmp20 = load i8, i8* %tmp19, align 1 334 %tmp21 = zext i8 %tmp20 to i64 335 %tmp22 = shl nuw nsw i64 %tmp21, 24 336 %tmp23 = or i64 %tmp18, %tmp22 337 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5 338 %tmp25 = load i8, i8* %tmp24, align 1 339 %tmp26 = zext i8 %tmp25 to i64 340 %tmp27 = shl nuw nsw i64 %tmp26, 16 341 %tmp28 = or i64 %tmp23, %tmp27 342 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6 343 %tmp30 = load i8, i8* %tmp29, align 1 344 %tmp31 = zext i8 %tmp30 to i64 345 %tmp32 = shl nuw nsw i64 %tmp31, 8 346 %tmp33 = or i64 %tmp28, %tmp32 347 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7 348 %tmp35 = load i8, i8* %tmp34, align 1 349 %tmp36 = zext i8 %tmp35 to i64 350 %tmp37 = or i64 %tmp33, %tmp36 351 ret i64 %tmp37 352 } 353 354 ; Part of the load by bytes pattern is used outside of the pattern 355 ; i8* p; 356 ; i32 x = (i32) p[1] 357 ; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3] 358 ; x | res 359 define i32 @load_i32_by_i8_bswap_uses(i32* %arg) { 360 ; CHECK-LABEL: load_i32_by_i8_bswap_uses: 361 ; CHECK: # %bb.0: 362 ; CHECK-NEXT: pushl %esi 363 ; CHECK-NEXT: .cfi_def_cfa_offset 8 364 ; CHECK-NEXT: .cfi_offset %esi, -8 365 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 366 ; CHECK-NEXT: movzbl (%eax), %ecx 367 ; CHECK-NEXT: shll $24, %ecx 368 ; CHECK-NEXT: movzbl 1(%eax), %edx 369 ; CHECK-NEXT: movl %edx, %esi 370 ; CHECK-NEXT: shll $16, %esi 371 ; CHECK-NEXT: orl %ecx, %esi 372 ; CHECK-NEXT: movzbl 2(%eax), %ecx 373 ; CHECK-NEXT: shll $8, %ecx 374 ; CHECK-NEXT: orl %esi, %ecx 375 ; CHECK-NEXT: movzbl 3(%eax), %eax 376 ; CHECK-NEXT: orl %ecx, %eax 377 ; CHECK-NEXT: orl %edx, %eax 378 ; CHECK-NEXT: popl %esi 379 ; CHECK-NEXT: .cfi_def_cfa_offset 4 380 ; CHECK-NEXT: retl 381 ; 382 ; CHECK64-LABEL: load_i32_by_i8_bswap_uses: 383 ; CHECK64: # %bb.0: 384 ; CHECK64-NEXT: movzbl (%rdi), %eax 385 ; CHECK64-NEXT: shll $24, %eax 386 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx 387 ; CHECK64-NEXT: movl %ecx, %edx 388 ; CHECK64-NEXT: shll $16, %edx 389 ; CHECK64-NEXT: orl %eax, %edx 390 ; CHECK64-NEXT: movzbl 2(%rdi), %esi 391 ; CHECK64-NEXT: shll $8, %esi 392 ; CHECK64-NEXT: orl %edx, %esi 393 ; CHECK64-NEXT: movzbl 3(%rdi), %eax 394 ; CHECK64-NEXT: orl %esi, %eax 395 ; CHECK64-NEXT: orl %ecx, %eax 396 ; CHECK64-NEXT: retq 397 %tmp = bitcast i32* %arg to i8* 398 %tmp1 = load i8, i8* %tmp, align 1 399 %tmp2 = zext i8 %tmp1 to i32 400 %tmp3 = shl nuw nsw i32 %tmp2, 24 401 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 402 %tmp5 = load i8, i8* %tmp4, align 1 403 %tmp6 = zext i8 %tmp5 to i32 404 %tmp7 = shl nuw nsw i32 %tmp6, 16 405 %tmp8 = or i32 %tmp7, %tmp3 406 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 407 %tmp10 = load i8, i8* %tmp9, align 1 408 %tmp11 = zext i8 %tmp10 to i32 409 %tmp12 = shl nuw nsw i32 %tmp11, 8 410 %tmp13 = or i32 %tmp8, %tmp12 411 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 412 %tmp15 = load i8, i8* %tmp14, align 1 413 %tmp16 = zext i8 %tmp15 to i32 414 %tmp17 = or i32 %tmp13, %tmp16 415 ; Use individual part of the pattern outside of the pattern 416 %tmp18 = or i32 %tmp6, %tmp17 417 ret i32 %tmp18 418 } 419 420 ; One of the loads is volatile 421 ; i8* p; 422 ; p0 = volatile *p; 423 ; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 424 define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) { 425 ; CHECK-LABEL: load_i32_by_i8_bswap_volatile: 426 ; CHECK: # %bb.0: 427 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 428 ; CHECK-NEXT: movzbl (%eax), %ecx 429 ; CHECK-NEXT: shll $24, %ecx 430 ; CHECK-NEXT: movzbl 1(%eax), %edx 431 ; CHECK-NEXT: shll $16, %edx 432 ; CHECK-NEXT: orl %ecx, %edx 433 ; CHECK-NEXT: movzbl 2(%eax), %ecx 434 ; CHECK-NEXT: shll $8, %ecx 435 ; CHECK-NEXT: orl %edx, %ecx 436 ; CHECK-NEXT: movzbl 3(%eax), %eax 437 ; CHECK-NEXT: orl %ecx, %eax 438 ; CHECK-NEXT: retl 439 ; 440 ; CHECK64-LABEL: load_i32_by_i8_bswap_volatile: 441 ; CHECK64: # %bb.0: 442 ; CHECK64-NEXT: movzbl (%rdi), %eax 443 ; CHECK64-NEXT: shll $24, %eax 444 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx 445 ; CHECK64-NEXT: shll $16, %ecx 446 ; CHECK64-NEXT: orl %eax, %ecx 447 ; CHECK64-NEXT: movzbl 2(%rdi), %edx 448 ; CHECK64-NEXT: shll $8, %edx 449 ; CHECK64-NEXT: orl %ecx, %edx 450 ; CHECK64-NEXT: movzbl 3(%rdi), %eax 451 ; CHECK64-NEXT: orl %edx, %eax 452 ; CHECK64-NEXT: retq 453 %tmp = bitcast i32* %arg to i8* 454 %tmp1 = load volatile i8, i8* %tmp, align 1 455 %tmp2 = zext i8 %tmp1 to i32 456 %tmp3 = shl nuw nsw i32 %tmp2, 24 457 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 458 %tmp5 = load i8, i8* %tmp4, align 1 459 %tmp6 = zext i8 %tmp5 to i32 460 %tmp7 = shl nuw nsw i32 %tmp6, 16 461 %tmp8 = or i32 %tmp7, %tmp3 462 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 463 %tmp10 = load i8, i8* %tmp9, align 1 464 %tmp11 = zext i8 %tmp10 to i32 465 %tmp12 = shl nuw nsw i32 %tmp11, 8 466 %tmp13 = or i32 %tmp8, %tmp12 467 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 468 %tmp15 = load i8, i8* %tmp14, align 1 469 %tmp16 = zext i8 %tmp15 to i32 470 %tmp17 = or i32 %tmp13, %tmp16 471 ret i32 %tmp17 472 } 473 474 ; There is a store in between individual loads 475 ; i8* p, q; 476 ; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16) 477 ; *q = 0; 478 ; res2 = ((i32) p[2] << 8) | (i32) p[3] 479 ; res1 | res2 480 define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) { 481 ; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between: 482 ; CHECK: # %bb.0: 483 ; CHECK-NEXT: pushl %esi 484 ; CHECK-NEXT: .cfi_def_cfa_offset 8 485 ; CHECK-NEXT: .cfi_offset %esi, -8 486 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 487 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 488 ; CHECK-NEXT: movzbl (%ecx), %edx 489 ; CHECK-NEXT: shll $24, %edx 490 ; CHECK-NEXT: movzbl 1(%ecx), %esi 491 ; CHECK-NEXT: movl $0, (%eax) 492 ; CHECK-NEXT: shll $16, %esi 493 ; CHECK-NEXT: orl %edx, %esi 494 ; CHECK-NEXT: movzbl 2(%ecx), %edx 495 ; CHECK-NEXT: shll $8, %edx 496 ; CHECK-NEXT: orl %esi, %edx 497 ; CHECK-NEXT: movzbl 3(%ecx), %eax 498 ; CHECK-NEXT: orl %edx, %eax 499 ; CHECK-NEXT: popl %esi 500 ; CHECK-NEXT: .cfi_def_cfa_offset 4 501 ; CHECK-NEXT: retl 502 ; 503 ; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between: 504 ; CHECK64: # %bb.0: 505 ; CHECK64-NEXT: movzbl (%rdi), %eax 506 ; CHECK64-NEXT: shll $24, %eax 507 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx 508 ; CHECK64-NEXT: movl $0, (%rsi) 509 ; CHECK64-NEXT: shll $16, %ecx 510 ; CHECK64-NEXT: orl %eax, %ecx 511 ; CHECK64-NEXT: movzbl 2(%rdi), %edx 512 ; CHECK64-NEXT: shll $8, %edx 513 ; CHECK64-NEXT: orl %ecx, %edx 514 ; CHECK64-NEXT: movzbl 3(%rdi), %eax 515 ; CHECK64-NEXT: orl %edx, %eax 516 ; CHECK64-NEXT: retq 517 %tmp = bitcast i32* %arg to i8* 518 %tmp2 = load i8, i8* %tmp, align 1 519 %tmp3 = zext i8 %tmp2 to i32 520 %tmp4 = shl nuw nsw i32 %tmp3, 24 521 %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1 522 %tmp6 = load i8, i8* %tmp5, align 1 523 ; This store will prevent folding of the pattern 524 store i32 0, i32* %arg1 525 %tmp7 = zext i8 %tmp6 to i32 526 %tmp8 = shl nuw nsw i32 %tmp7, 16 527 %tmp9 = or i32 %tmp8, %tmp4 528 %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2 529 %tmp11 = load i8, i8* %tmp10, align 1 530 %tmp12 = zext i8 %tmp11 to i32 531 %tmp13 = shl nuw nsw i32 %tmp12, 8 532 %tmp14 = or i32 %tmp9, %tmp13 533 %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3 534 %tmp16 = load i8, i8* %tmp15, align 1 535 %tmp17 = zext i8 %tmp16 to i32 536 %tmp18 = or i32 %tmp14, %tmp17 537 ret i32 %tmp18 538 } 539 540 ; One of the loads is from an unrelated location 541 ; i8* p, q; 542 ; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 543 define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) { 544 ; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load: 545 ; CHECK: # %bb.0: 546 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 547 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 548 ; CHECK-NEXT: movzbl (%ecx), %edx 549 ; CHECK-NEXT: shll $24, %edx 550 ; CHECK-NEXT: movzbl 1(%eax), %eax 551 ; CHECK-NEXT: shll $16, %eax 552 ; CHECK-NEXT: orl %edx, %eax 553 ; CHECK-NEXT: movzbl 2(%ecx), %edx 554 ; CHECK-NEXT: shll $8, %edx 555 ; CHECK-NEXT: orl %eax, %edx 556 ; CHECK-NEXT: movzbl 3(%ecx), %eax 557 ; CHECK-NEXT: orl %edx, %eax 558 ; CHECK-NEXT: retl 559 ; 560 ; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load: 561 ; CHECK64: # %bb.0: 562 ; CHECK64-NEXT: movzbl (%rdi), %eax 563 ; CHECK64-NEXT: shll $24, %eax 564 ; CHECK64-NEXT: movzbl 1(%rsi), %ecx 565 ; CHECK64-NEXT: shll $16, %ecx 566 ; CHECK64-NEXT: orl %eax, %ecx 567 ; CHECK64-NEXT: movzbl 2(%rdi), %edx 568 ; CHECK64-NEXT: shll $8, %edx 569 ; CHECK64-NEXT: orl %ecx, %edx 570 ; CHECK64-NEXT: movzbl 3(%rdi), %eax 571 ; CHECK64-NEXT: orl %edx, %eax 572 ; CHECK64-NEXT: retq 573 %tmp = bitcast i32* %arg to i8* 574 %tmp2 = bitcast i32* %arg1 to i8* 575 %tmp3 = load i8, i8* %tmp, align 1 576 %tmp4 = zext i8 %tmp3 to i32 577 %tmp5 = shl nuw nsw i32 %tmp4, 24 578 ; Load from an unrelated address 579 %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1 580 %tmp7 = load i8, i8* %tmp6, align 1 581 %tmp8 = zext i8 %tmp7 to i32 582 %tmp9 = shl nuw nsw i32 %tmp8, 16 583 %tmp10 = or i32 %tmp9, %tmp5 584 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2 585 %tmp12 = load i8, i8* %tmp11, align 1 586 %tmp13 = zext i8 %tmp12 to i32 587 %tmp14 = shl nuw nsw i32 %tmp13, 8 588 %tmp15 = or i32 %tmp10, %tmp14 589 %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3 590 %tmp17 = load i8, i8* %tmp16, align 1 591 %tmp18 = zext i8 %tmp17 to i32 592 %tmp19 = or i32 %tmp15, %tmp18 593 ret i32 %tmp19 594 } 595 596 ; i8* p; 597 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) 598 define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) { 599 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset: 600 ; CHECK: # %bb.0: 601 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 602 ; CHECK-NEXT: movl 1(%eax), %eax 603 ; CHECK-NEXT: retl 604 ; 605 ; CHECK64-LABEL: load_i32_by_i8_nonzero_offset: 606 ; CHECK64: # %bb.0: 607 ; CHECK64-NEXT: movl 1(%rdi), %eax 608 ; CHECK64-NEXT: retq 609 %tmp = bitcast i32* %arg to i8* 610 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 611 %tmp2 = load i8, i8* %tmp1, align 1 612 %tmp3 = zext i8 %tmp2 to i32 613 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2 614 %tmp5 = load i8, i8* %tmp4, align 1 615 %tmp6 = zext i8 %tmp5 to i32 616 %tmp7 = shl nuw nsw i32 %tmp6, 8 617 %tmp8 = or i32 %tmp7, %tmp3 618 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3 619 %tmp10 = load i8, i8* %tmp9, align 1 620 %tmp11 = zext i8 %tmp10 to i32 621 %tmp12 = shl nuw nsw i32 %tmp11, 16 622 %tmp13 = or i32 %tmp8, %tmp12 623 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4 624 %tmp15 = load i8, i8* %tmp14, align 1 625 %tmp16 = zext i8 %tmp15 to i32 626 %tmp17 = shl nuw nsw i32 %tmp16, 24 627 %tmp18 = or i32 %tmp13, %tmp17 628 ret i32 %tmp18 629 } 630 631 ; i8* p; 632 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) 633 define i32 @load_i32_by_i8_neg_offset(i32* %arg) { 634 ; CHECK-LABEL: load_i32_by_i8_neg_offset: 635 ; CHECK: # %bb.0: 636 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 637 ; CHECK-NEXT: movl -4(%eax), %eax 638 ; CHECK-NEXT: retl 639 ; 640 ; CHECK64-LABEL: load_i32_by_i8_neg_offset: 641 ; CHECK64: # %bb.0: 642 ; CHECK64-NEXT: movl -4(%rdi), %eax 643 ; CHECK64-NEXT: retq 644 %tmp = bitcast i32* %arg to i8* 645 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4 646 %tmp2 = load i8, i8* %tmp1, align 1 647 %tmp3 = zext i8 %tmp2 to i32 648 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3 649 %tmp5 = load i8, i8* %tmp4, align 1 650 %tmp6 = zext i8 %tmp5 to i32 651 %tmp7 = shl nuw nsw i32 %tmp6, 8 652 %tmp8 = or i32 %tmp7, %tmp3 653 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2 654 %tmp10 = load i8, i8* %tmp9, align 1 655 %tmp11 = zext i8 %tmp10 to i32 656 %tmp12 = shl nuw nsw i32 %tmp11, 16 657 %tmp13 = or i32 %tmp8, %tmp12 658 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1 659 %tmp15 = load i8, i8* %tmp14, align 1 660 %tmp16 = zext i8 %tmp15 to i32 661 %tmp17 = shl nuw nsw i32 %tmp16, 24 662 %tmp18 = or i32 %tmp13, %tmp17 663 ret i32 %tmp18 664 } 665 666 ; i8* p; 667 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) 668 define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) { 669 ; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap: 670 ; BSWAP: # %bb.0: 671 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 672 ; BSWAP-NEXT: movl 1(%eax), %eax 673 ; BSWAP-NEXT: bswapl %eax 674 ; BSWAP-NEXT: retl 675 ; 676 ; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap: 677 ; MOVBE: # %bb.0: 678 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 679 ; MOVBE-NEXT: movbel 1(%eax), %eax 680 ; MOVBE-NEXT: retl 681 ; 682 ; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap: 683 ; BSWAP64: # %bb.0: 684 ; BSWAP64-NEXT: movl 1(%rdi), %eax 685 ; BSWAP64-NEXT: bswapl %eax 686 ; BSWAP64-NEXT: retq 687 ; 688 ; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap: 689 ; MOVBE64: # %bb.0: 690 ; MOVBE64-NEXT: movbel 1(%rdi), %eax 691 ; MOVBE64-NEXT: retq 692 %tmp = bitcast i32* %arg to i8* 693 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4 694 %tmp2 = load i8, i8* %tmp1, align 1 695 %tmp3 = zext i8 %tmp2 to i32 696 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3 697 %tmp5 = load i8, i8* %tmp4, align 1 698 %tmp6 = zext i8 %tmp5 to i32 699 %tmp7 = shl nuw nsw i32 %tmp6, 8 700 %tmp8 = or i32 %tmp7, %tmp3 701 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 702 %tmp10 = load i8, i8* %tmp9, align 1 703 %tmp11 = zext i8 %tmp10 to i32 704 %tmp12 = shl nuw nsw i32 %tmp11, 16 705 %tmp13 = or i32 %tmp8, %tmp12 706 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1 707 %tmp15 = load i8, i8* %tmp14, align 1 708 %tmp16 = zext i8 %tmp15 to i32 709 %tmp17 = shl nuw nsw i32 %tmp16, 24 710 %tmp18 = or i32 %tmp13, %tmp17 711 ret i32 %tmp18 712 } 713 714 ; i8* p; 715 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) 716 define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) { 717 ; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap: 718 ; BSWAP: # %bb.0: 719 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 720 ; BSWAP-NEXT: movl -4(%eax), %eax 721 ; BSWAP-NEXT: bswapl %eax 722 ; BSWAP-NEXT: retl 723 ; 724 ; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap: 725 ; MOVBE: # %bb.0: 726 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 727 ; MOVBE-NEXT: movbel -4(%eax), %eax 728 ; MOVBE-NEXT: retl 729 ; 730 ; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap: 731 ; BSWAP64: # %bb.0: 732 ; BSWAP64-NEXT: movl -4(%rdi), %eax 733 ; BSWAP64-NEXT: bswapl %eax 734 ; BSWAP64-NEXT: retq 735 ; 736 ; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap: 737 ; MOVBE64: # %bb.0: 738 ; MOVBE64-NEXT: movbel -4(%rdi), %eax 739 ; MOVBE64-NEXT: retq 740 %tmp = bitcast i32* %arg to i8* 741 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1 742 %tmp2 = load i8, i8* %tmp1, align 1 743 %tmp3 = zext i8 %tmp2 to i32 744 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2 745 %tmp5 = load i8, i8* %tmp4, align 1 746 %tmp6 = zext i8 %tmp5 to i32 747 %tmp7 = shl nuw nsw i32 %tmp6, 8 748 %tmp8 = or i32 %tmp7, %tmp3 749 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3 750 %tmp10 = load i8, i8* %tmp9, align 1 751 %tmp11 = zext i8 %tmp10 to i32 752 %tmp12 = shl nuw nsw i32 %tmp11, 16 753 %tmp13 = or i32 %tmp8, %tmp12 754 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4 755 %tmp15 = load i8, i8* %tmp14, align 1 756 %tmp16 = zext i8 %tmp15 to i32 757 %tmp17 = shl nuw nsw i32 %tmp16, 24 758 %tmp18 = or i32 %tmp13, %tmp17 759 ret i32 %tmp18 760 } 761 762 ; i8* p; i32 i; 763 ; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3] 764 define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) { 765 ; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset: 766 ; BSWAP: # %bb.0: 767 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 768 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %ecx 769 ; BSWAP-NEXT: movl (%ecx,%eax), %eax 770 ; BSWAP-NEXT: bswapl %eax 771 ; BSWAP-NEXT: retl 772 ; 773 ; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset: 774 ; MOVBE: # %bb.0: 775 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 776 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx 777 ; MOVBE-NEXT: movbel (%ecx,%eax), %eax 778 ; MOVBE-NEXT: retl 779 ; 780 ; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset: 781 ; BSWAP64: # %bb.0: 782 ; BSWAP64-NEXT: movslq %esi, %rax 783 ; BSWAP64-NEXT: movl (%rdi,%rax), %eax 784 ; BSWAP64-NEXT: bswapl %eax 785 ; BSWAP64-NEXT: retq 786 ; 787 ; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset: 788 ; MOVBE64: # %bb.0: 789 ; MOVBE64-NEXT: movslq %esi, %rax 790 ; MOVBE64-NEXT: movbel (%rdi,%rax), %eax 791 ; MOVBE64-NEXT: retq 792 %tmp = bitcast i32* %arg to i8* 793 %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1 794 %tmp3 = load i8, i8* %tmp2, align 1 795 %tmp4 = zext i8 %tmp3 to i32 796 %tmp5 = shl nuw nsw i32 %tmp4, 24 797 %tmp6 = add nuw nsw i32 %arg1, 1 798 %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6 799 %tmp8 = load i8, i8* %tmp7, align 1 800 %tmp9 = zext i8 %tmp8 to i32 801 %tmp10 = shl nuw nsw i32 %tmp9, 16 802 %tmp11 = or i32 %tmp10, %tmp5 803 %tmp12 = add nuw nsw i32 %arg1, 2 804 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12 805 %tmp14 = load i8, i8* %tmp13, align 1 806 %tmp15 = zext i8 %tmp14 to i32 807 %tmp16 = shl nuw nsw i32 %tmp15, 8 808 %tmp17 = or i32 %tmp11, %tmp16 809 %tmp18 = add nuw nsw i32 %arg1, 3 810 %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18 811 %tmp20 = load i8, i8* %tmp19, align 1 812 %tmp21 = zext i8 %tmp20 to i32 813 %tmp22 = or i32 %tmp17, %tmp21 814 ret i32 %tmp22 815 } 816 817 ; Verify that we don't crash handling shl i32 %conv57, 32 818 define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) { 819 ; CHECK-LABEL: shift_i32_by_32: 820 ; CHECK: # %bb.0: # %entry 821 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 822 ; CHECK-NEXT: movl $-1, 4(%eax) 823 ; CHECK-NEXT: movl $-1, (%eax) 824 ; CHECK-NEXT: retl 825 ; 826 ; CHECK64-LABEL: shift_i32_by_32: 827 ; CHECK64: # %bb.0: # %entry 828 ; CHECK64-NEXT: movq $-1, (%rdx) 829 ; CHECK64-NEXT: retq 830 entry: 831 %load1 = load i8, i8* %src1, align 1 832 %conv46 = zext i8 %load1 to i32 833 %shl47 = shl i32 %conv46, 56 834 %or55 = or i32 %shl47, 0 835 %load2 = load i8, i8* %src2, align 1 836 %conv57 = zext i8 %load2 to i32 837 %shl58 = shl i32 %conv57, 32 838 %or59 = or i32 %or55, %shl58 839 %or74 = or i32 %or59, 0 840 %conv75 = sext i32 %or74 to i64 841 store i64 %conv75, i64* %dst, align 8 842 ret void 843 } 844 845 declare i16 @llvm.bswap.i16(i16) 846 847 ; i16* p; 848 ; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16) 849 define i32 @load_i32_by_bswap_i16(i32* %arg) { 850 ; BSWAP-LABEL: load_i32_by_bswap_i16: 851 ; BSWAP: # %bb.0: 852 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 853 ; BSWAP-NEXT: movl (%eax), %eax 854 ; BSWAP-NEXT: bswapl %eax 855 ; BSWAP-NEXT: retl 856 ; 857 ; MOVBE-LABEL: load_i32_by_bswap_i16: 858 ; MOVBE: # %bb.0: 859 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 860 ; MOVBE-NEXT: movbel (%eax), %eax 861 ; MOVBE-NEXT: retl 862 ; 863 ; BSWAP64-LABEL: load_i32_by_bswap_i16: 864 ; BSWAP64: # %bb.0: 865 ; BSWAP64-NEXT: movl (%rdi), %eax 866 ; BSWAP64-NEXT: bswapl %eax 867 ; BSWAP64-NEXT: retq 868 ; 869 ; MOVBE64-LABEL: load_i32_by_bswap_i16: 870 ; MOVBE64: # %bb.0: 871 ; MOVBE64-NEXT: movbel (%rdi), %eax 872 ; MOVBE64-NEXT: retq 873 %tmp = bitcast i32* %arg to i16* 874 %tmp1 = load i16, i16* %tmp, align 4 875 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) 876 %tmp2 = zext i16 %tmp11 to i32 877 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 878 %tmp4 = load i16, i16* %tmp3, align 1 879 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) 880 %tmp5 = zext i16 %tmp41 to i32 881 %tmp6 = shl nuw nsw i32 %tmp2, 16 882 %tmp7 = or i32 %tmp6, %tmp5 883 ret i32 %tmp7 884 } 885 886 ; i16* p; 887 ; (i32) p[0] | (sext(p[1] << 16) to i32) 888 define i32 @load_i32_by_sext_i16(i32* %arg) { 889 ; CHECK-LABEL: load_i32_by_sext_i16: 890 ; CHECK: # %bb.0: 891 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 892 ; CHECK-NEXT: movl (%eax), %eax 893 ; CHECK-NEXT: retl 894 ; 895 ; CHECK64-LABEL: load_i32_by_sext_i16: 896 ; CHECK64: # %bb.0: 897 ; CHECK64-NEXT: movl (%rdi), %eax 898 ; CHECK64-NEXT: retq 899 %tmp = bitcast i32* %arg to i16* 900 %tmp1 = load i16, i16* %tmp, align 1 901 %tmp2 = zext i16 %tmp1 to i32 902 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 903 %tmp4 = load i16, i16* %tmp3, align 1 904 %tmp5 = sext i16 %tmp4 to i32 905 %tmp6 = shl nuw nsw i32 %tmp5, 16 906 %tmp7 = or i32 %tmp6, %tmp2 907 ret i32 %tmp7 908 } 909 910 ; i8* arg; i32 i; 911 ; p = arg + 12; 912 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) 913 define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) { 914 ; CHECK-LABEL: load_i32_by_i8_base_offset_index: 915 ; CHECK: # %bb.0: 916 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 917 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 918 ; CHECK-NEXT: movl 12(%ecx,%eax), %eax 919 ; CHECK-NEXT: retl 920 ; 921 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index: 922 ; CHECK64: # %bb.0: 923 ; CHECK64-NEXT: movl %esi, %eax 924 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 925 ; CHECK64-NEXT: retq 926 %tmp = add nuw nsw i32 %i, 3 927 %tmp2 = add nuw nsw i32 %i, 2 928 %tmp3 = add nuw nsw i32 %i, 1 929 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 930 %tmp5 = zext i32 %i to i64 931 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5 932 %tmp7 = load i8, i8* %tmp6, align 1 933 %tmp8 = zext i8 %tmp7 to i32 934 %tmp9 = zext i32 %tmp3 to i64 935 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9 936 %tmp11 = load i8, i8* %tmp10, align 1 937 %tmp12 = zext i8 %tmp11 to i32 938 %tmp13 = shl nuw nsw i32 %tmp12, 8 939 %tmp14 = or i32 %tmp13, %tmp8 940 %tmp15 = zext i32 %tmp2 to i64 941 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15 942 %tmp17 = load i8, i8* %tmp16, align 1 943 %tmp18 = zext i8 %tmp17 to i32 944 %tmp19 = shl nuw nsw i32 %tmp18, 16 945 %tmp20 = or i32 %tmp14, %tmp19 946 %tmp21 = zext i32 %tmp to i64 947 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21 948 %tmp23 = load i8, i8* %tmp22, align 1 949 %tmp24 = zext i8 %tmp23 to i32 950 %tmp25 = shl nuw i32 %tmp24, 24 951 %tmp26 = or i32 %tmp20, %tmp25 952 ret i32 %tmp26 953 } 954 955 ; i8* arg; i32 i; 956 ; p = arg + 12; 957 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) 958 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { 959 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: 960 ; CHECK: # %bb.0: 961 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 962 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 963 ; CHECK-NEXT: movl 13(%ecx,%eax), %eax 964 ; CHECK-NEXT: retl 965 ; 966 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2: 967 ; CHECK64: # %bb.0: 968 ; CHECK64-NEXT: movl %esi, %eax 969 ; CHECK64-NEXT: movl 13(%rdi,%rax), %eax 970 ; CHECK64-NEXT: retq 971 %tmp = add nuw nsw i32 %i, 4 972 %tmp2 = add nuw nsw i32 %i, 3 973 %tmp3 = add nuw nsw i32 %i, 2 974 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 975 %tmp5 = add nuw nsw i32 %i, 1 976 %tmp27 = zext i32 %tmp5 to i64 977 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27 978 %tmp29 = load i8, i8* %tmp28, align 1 979 %tmp30 = zext i8 %tmp29 to i32 980 %tmp31 = zext i32 %tmp3 to i64 981 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31 982 %tmp33 = load i8, i8* %tmp32, align 1 983 %tmp34 = zext i8 %tmp33 to i32 984 %tmp35 = shl nuw nsw i32 %tmp34, 8 985 %tmp36 = or i32 %tmp35, %tmp30 986 %tmp37 = zext i32 %tmp2 to i64 987 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37 988 %tmp39 = load i8, i8* %tmp38, align 1 989 %tmp40 = zext i8 %tmp39 to i32 990 %tmp41 = shl nuw nsw i32 %tmp40, 16 991 %tmp42 = or i32 %tmp36, %tmp41 992 %tmp43 = zext i32 %tmp to i64 993 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43 994 %tmp45 = load i8, i8* %tmp44, align 1 995 %tmp46 = zext i8 %tmp45 to i32 996 %tmp47 = shl nuw i32 %tmp46, 24 997 %tmp48 = or i32 %tmp42, %tmp47 998 ret i32 %tmp48 999 } 1000 1001 ; i8* arg; i32 i; 1002 ; 1003 ; p0 = arg; 1004 ; p1 = arg + i + 1; 1005 ; p2 = arg + i + 2; 1006 ; p3 = arg + i + 3; 1007 ; 1008 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24) 1009 ; 1010 ; This test excercises zero and any extend loads as a part of load combine pattern. 1011 ; In order to fold the pattern above we need to reassociate the address computation 1012 ; first. By the time the address computation is reassociated loads are combined to 1013 ; to zext and aext loads. 1014 define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) { 1015 ; CHECK-LABEL: load_i32_by_i8_zaext_loads: 1016 ; CHECK: # %bb.0: 1017 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1018 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1019 ; CHECK-NEXT: movl 12(%ecx,%eax), %eax 1020 ; CHECK-NEXT: retl 1021 ; 1022 ; CHECK64-LABEL: load_i32_by_i8_zaext_loads: 1023 ; CHECK64: # %bb.0: 1024 ; CHECK64-NEXT: movl %esi, %eax 1025 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 1026 ; CHECK64-NEXT: retq 1027 %tmp = add nuw nsw i32 %arg1, 3 1028 %tmp2 = add nuw nsw i32 %arg1, 2 1029 %tmp3 = add nuw nsw i32 %arg1, 1 1030 %tmp4 = zext i32 %tmp to i64 1031 %tmp5 = zext i32 %tmp2 to i64 1032 %tmp6 = zext i32 %tmp3 to i64 1033 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4 1034 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5 1035 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6 1036 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12 1037 %tmp33 = zext i32 %arg1 to i64 1038 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33 1039 %tmp35 = load i8, i8* %tmp34, align 1 1040 %tmp36 = zext i8 %tmp35 to i32 1041 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12 1042 %tmp38 = load i8, i8* %tmp37, align 1 1043 %tmp39 = zext i8 %tmp38 to i32 1044 %tmp40 = shl nuw nsw i32 %tmp39, 8 1045 %tmp41 = or i32 %tmp40, %tmp36 1046 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12 1047 %tmp43 = load i8, i8* %tmp42, align 1 1048 %tmp44 = zext i8 %tmp43 to i32 1049 %tmp45 = shl nuw nsw i32 %tmp44, 16 1050 %tmp46 = or i32 %tmp41, %tmp45 1051 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12 1052 %tmp48 = load i8, i8* %tmp47, align 1 1053 %tmp49 = zext i8 %tmp48 to i32 1054 %tmp50 = shl nuw i32 %tmp49, 24 1055 %tmp51 = or i32 %tmp46, %tmp50 1056 ret i32 %tmp51 1057 } 1058 1059 ; The same as load_i32_by_i8_zaext_loads but the last load is combined to 1060 ; a sext load. 1061 ; 1062 ; i8* arg; i32 i; 1063 ; 1064 ; p0 = arg; 1065 ; p1 = arg + i + 1; 1066 ; p2 = arg + i + 2; 1067 ; p3 = arg + i + 3; 1068 ; 1069 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24) 1070 define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) { 1071 ; CHECK-LABEL: load_i32_by_i8_zsext_loads: 1072 ; CHECK: # %bb.0: 1073 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1074 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1075 ; CHECK-NEXT: movl 12(%ecx,%eax), %eax 1076 ; CHECK-NEXT: retl 1077 ; 1078 ; CHECK64-LABEL: load_i32_by_i8_zsext_loads: 1079 ; CHECK64: # %bb.0: 1080 ; CHECK64-NEXT: movl %esi, %eax 1081 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 1082 ; CHECK64-NEXT: retq 1083 %tmp = add nuw nsw i32 %arg1, 3 1084 %tmp2 = add nuw nsw i32 %arg1, 2 1085 %tmp3 = add nuw nsw i32 %arg1, 1 1086 %tmp4 = zext i32 %tmp to i64 1087 %tmp5 = zext i32 %tmp2 to i64 1088 %tmp6 = zext i32 %tmp3 to i64 1089 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4 1090 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5 1091 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6 1092 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12 1093 %tmp33 = zext i32 %arg1 to i64 1094 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33 1095 %tmp35 = load i8, i8* %tmp34, align 1 1096 %tmp36 = zext i8 %tmp35 to i32 1097 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12 1098 %tmp38 = load i8, i8* %tmp37, align 1 1099 %tmp39 = zext i8 %tmp38 to i32 1100 %tmp40 = shl nuw nsw i32 %tmp39, 8 1101 %tmp41 = or i32 %tmp40, %tmp36 1102 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12 1103 %tmp43 = load i8, i8* %tmp42, align 1 1104 %tmp44 = zext i8 %tmp43 to i32 1105 %tmp45 = shl nuw nsw i32 %tmp44, 16 1106 %tmp46 = or i32 %tmp41, %tmp45 1107 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12 1108 %tmp48 = load i8, i8* %tmp47, align 1 1109 %tmp49 = sext i8 %tmp48 to i16 1110 %tmp50 = zext i16 %tmp49 to i32 1111 %tmp51 = shl nuw i32 %tmp50, 24 1112 %tmp52 = or i32 %tmp46, %tmp51 1113 ret i32 %tmp52 1114 } 1115 1116 ; i8* p; 1117 ; (i32) p[0] | ((i32) p[1] << 8) 1118 define i32 @zext_load_i32_by_i8(i32* %arg) { 1119 ; CHECK-LABEL: zext_load_i32_by_i8: 1120 ; CHECK: # %bb.0: 1121 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1122 ; CHECK-NEXT: movzbl (%eax), %ecx 1123 ; CHECK-NEXT: movzbl 1(%eax), %eax 1124 ; CHECK-NEXT: shll $8, %eax 1125 ; CHECK-NEXT: orl %ecx, %eax 1126 ; CHECK-NEXT: retl 1127 ; 1128 ; CHECK64-LABEL: zext_load_i32_by_i8: 1129 ; CHECK64: # %bb.0: 1130 ; CHECK64-NEXT: movzbl (%rdi), %ecx 1131 ; CHECK64-NEXT: movzbl 1(%rdi), %eax 1132 ; CHECK64-NEXT: shll $8, %eax 1133 ; CHECK64-NEXT: orl %ecx, %eax 1134 ; CHECK64-NEXT: retq 1135 %tmp = bitcast i32* %arg to i8* 1136 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 1137 %tmp2 = load i8, i8* %tmp1, align 1 1138 %tmp3 = zext i8 %tmp2 to i32 1139 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 1140 %tmp5 = load i8, i8* %tmp4, align 1 1141 %tmp6 = zext i8 %tmp5 to i32 1142 %tmp7 = shl nuw nsw i32 %tmp6, 8 1143 %tmp8 = or i32 %tmp7, %tmp3 1144 ret i32 %tmp8 1145 } 1146 1147 ; i8* p; 1148 ; ((i32) p[0] << 8) | ((i32) p[1] << 16) 1149 define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { 1150 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8: 1151 ; CHECK: # %bb.0: 1152 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1153 ; CHECK-NEXT: movzbl (%eax), %ecx 1154 ; CHECK-NEXT: shll $8, %ecx 1155 ; CHECK-NEXT: movzbl 1(%eax), %eax 1156 ; CHECK-NEXT: shll $16, %eax 1157 ; CHECK-NEXT: orl %ecx, %eax 1158 ; CHECK-NEXT: retl 1159 ; 1160 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_8: 1161 ; CHECK64: # %bb.0: 1162 ; CHECK64-NEXT: movzbl (%rdi), %ecx 1163 ; CHECK64-NEXT: shll $8, %ecx 1164 ; CHECK64-NEXT: movzbl 1(%rdi), %eax 1165 ; CHECK64-NEXT: shll $16, %eax 1166 ; CHECK64-NEXT: orl %ecx, %eax 1167 ; CHECK64-NEXT: retq 1168 %tmp = bitcast i32* %arg to i8* 1169 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 1170 %tmp2 = load i8, i8* %tmp1, align 1 1171 %tmp3 = zext i8 %tmp2 to i32 1172 %tmp30 = shl nuw nsw i32 %tmp3, 8 1173 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 1174 %tmp5 = load i8, i8* %tmp4, align 1 1175 %tmp6 = zext i8 %tmp5 to i32 1176 %tmp7 = shl nuw nsw i32 %tmp6, 16 1177 %tmp8 = or i32 %tmp7, %tmp30 1178 ret i32 %tmp8 1179 } 1180 1181 ; i8* p; 1182 ; ((i32) p[0] << 16) | ((i32) p[1] << 24) 1183 define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { 1184 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16: 1185 ; CHECK: # %bb.0: 1186 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1187 ; CHECK-NEXT: movzbl (%eax), %ecx 1188 ; CHECK-NEXT: shll $16, %ecx 1189 ; CHECK-NEXT: movzbl 1(%eax), %eax 1190 ; CHECK-NEXT: shll $24, %eax 1191 ; CHECK-NEXT: orl %ecx, %eax 1192 ; CHECK-NEXT: retl 1193 ; 1194 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_16: 1195 ; CHECK64: # %bb.0: 1196 ; CHECK64-NEXT: movzbl (%rdi), %ecx 1197 ; CHECK64-NEXT: shll $16, %ecx 1198 ; CHECK64-NEXT: movzbl 1(%rdi), %eax 1199 ; CHECK64-NEXT: shll $24, %eax 1200 ; CHECK64-NEXT: orl %ecx, %eax 1201 ; CHECK64-NEXT: retq 1202 %tmp = bitcast i32* %arg to i8* 1203 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 1204 %tmp2 = load i8, i8* %tmp1, align 1 1205 %tmp3 = zext i8 %tmp2 to i32 1206 %tmp30 = shl nuw nsw i32 %tmp3, 16 1207 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 1208 %tmp5 = load i8, i8* %tmp4, align 1 1209 %tmp6 = zext i8 %tmp5 to i32 1210 %tmp7 = shl nuw nsw i32 %tmp6, 24 1211 %tmp8 = or i32 %tmp7, %tmp30 1212 ret i32 %tmp8 1213 } 1214 1215 ; i8* p; 1216 ; (i32) p[1] | ((i32) p[0] << 8) 1217 define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { 1218 ; CHECK-LABEL: zext_load_i32_by_i8_bswap: 1219 ; CHECK: # %bb.0: 1220 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1221 ; CHECK-NEXT: movzbl 1(%eax), %ecx 1222 ; CHECK-NEXT: movzbl (%eax), %eax 1223 ; CHECK-NEXT: shll $8, %eax 1224 ; CHECK-NEXT: orl %ecx, %eax 1225 ; CHECK-NEXT: retl 1226 ; 1227 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap: 1228 ; CHECK64: # %bb.0: 1229 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx 1230 ; CHECK64-NEXT: movzbl (%rdi), %eax 1231 ; CHECK64-NEXT: shll $8, %eax 1232 ; CHECK64-NEXT: orl %ecx, %eax 1233 ; CHECK64-NEXT: retq 1234 %tmp = bitcast i32* %arg to i8* 1235 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 1236 %tmp2 = load i8, i8* %tmp1, align 1 1237 %tmp3 = zext i8 %tmp2 to i32 1238 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 1239 %tmp5 = load i8, i8* %tmp4, align 1 1240 %tmp6 = zext i8 %tmp5 to i32 1241 %tmp7 = shl nuw nsw i32 %tmp6, 8 1242 %tmp8 = or i32 %tmp7, %tmp3 1243 ret i32 %tmp8 1244 } 1245 1246 ; i8* p; 1247 ; ((i32) p[1] << 8) | ((i32) p[0] << 16) 1248 define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { 1249 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1250 ; CHECK: # %bb.0: 1251 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1252 ; CHECK-NEXT: movzbl 1(%eax), %ecx 1253 ; CHECK-NEXT: shll $8, %ecx 1254 ; CHECK-NEXT: movzbl (%eax), %eax 1255 ; CHECK-NEXT: shll $16, %eax 1256 ; CHECK-NEXT: orl %ecx, %eax 1257 ; CHECK-NEXT: retl 1258 ; 1259 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1260 ; CHECK64: # %bb.0: 1261 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx 1262 ; CHECK64-NEXT: shll $8, %ecx 1263 ; CHECK64-NEXT: movzbl (%rdi), %eax 1264 ; CHECK64-NEXT: shll $16, %eax 1265 ; CHECK64-NEXT: orl %ecx, %eax 1266 ; CHECK64-NEXT: retq 1267 %tmp = bitcast i32* %arg to i8* 1268 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 1269 %tmp2 = load i8, i8* %tmp1, align 1 1270 %tmp3 = zext i8 %tmp2 to i32 1271 %tmp30 = shl nuw nsw i32 %tmp3, 8 1272 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 1273 %tmp5 = load i8, i8* %tmp4, align 1 1274 %tmp6 = zext i8 %tmp5 to i32 1275 %tmp7 = shl nuw nsw i32 %tmp6, 16 1276 %tmp8 = or i32 %tmp7, %tmp30 1277 ret i32 %tmp8 1278 } 1279 1280 ; i8* p; 1281 ; ((i32) p[1] << 16) | ((i32) p[0] << 24) 1282 define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { 1283 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1284 ; CHECK: # %bb.0: 1285 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1286 ; CHECK-NEXT: movzbl 1(%eax), %ecx 1287 ; CHECK-NEXT: shll $16, %ecx 1288 ; CHECK-NEXT: movzbl (%eax), %eax 1289 ; CHECK-NEXT: shll $24, %eax 1290 ; CHECK-NEXT: orl %ecx, %eax 1291 ; CHECK-NEXT: retl 1292 ; 1293 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1294 ; CHECK64: # %bb.0: 1295 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx 1296 ; CHECK64-NEXT: shll $16, %ecx 1297 ; CHECK64-NEXT: movzbl (%rdi), %eax 1298 ; CHECK64-NEXT: shll $24, %eax 1299 ; CHECK64-NEXT: orl %ecx, %eax 1300 ; CHECK64-NEXT: retq 1301 %tmp = bitcast i32* %arg to i8* 1302 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 1303 %tmp2 = load i8, i8* %tmp1, align 1 1304 %tmp3 = zext i8 %tmp2 to i32 1305 %tmp30 = shl nuw nsw i32 %tmp3, 16 1306 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 1307 %tmp5 = load i8, i8* %tmp4, align 1 1308 %tmp6 = zext i8 %tmp5 to i32 1309 %tmp7 = shl nuw nsw i32 %tmp6, 24 1310 %tmp8 = or i32 %tmp7, %tmp30 1311 ret i32 %tmp8 1312 } 1313