Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
      3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
      4 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
      5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
      6 
      7 ; i8* p;
      8 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
      9 define i32 @load_i32_by_i8(i32* %arg) {
     10 ; CHECK-LABEL: load_i32_by_i8:
     11 ; CHECK:       # %bb.0:
     12 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     13 ; CHECK-NEXT:    movl (%eax), %eax
     14 ; CHECK-NEXT:    retl
     15 ;
     16 ; CHECK64-LABEL: load_i32_by_i8:
     17 ; CHECK64:       # %bb.0:
     18 ; CHECK64-NEXT:    movl (%rdi), %eax
     19 ; CHECK64-NEXT:    retq
     20   %tmp = bitcast i32* %arg to i8*
     21   %tmp1 = load i8, i8* %tmp, align 1
     22   %tmp2 = zext i8 %tmp1 to i32
     23   %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
     24   %tmp4 = load i8, i8* %tmp3, align 1
     25   %tmp5 = zext i8 %tmp4 to i32
     26   %tmp6 = shl nuw nsw i32 %tmp5, 8
     27   %tmp7 = or i32 %tmp6, %tmp2
     28   %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
     29   %tmp9 = load i8, i8* %tmp8, align 1
     30   %tmp10 = zext i8 %tmp9 to i32
     31   %tmp11 = shl nuw nsw i32 %tmp10, 16
     32   %tmp12 = or i32 %tmp7, %tmp11
     33   %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
     34   %tmp14 = load i8, i8* %tmp13, align 1
     35   %tmp15 = zext i8 %tmp14 to i32
     36   %tmp16 = shl nuw nsw i32 %tmp15, 24
     37   %tmp17 = or i32 %tmp12, %tmp16
     38   ret i32 %tmp17
     39 }
     40 
     41 ; i8* p;
     42 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
     43 define i32 @load_i32_by_i8_bswap(i32* %arg) {
     44 ; BSWAP-LABEL: load_i32_by_i8_bswap:
     45 ; BSWAP:       # %bb.0:
     46 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
     47 ; BSWAP-NEXT:    movl (%eax), %eax
     48 ; BSWAP-NEXT:    bswapl %eax
     49 ; BSWAP-NEXT:    retl
     50 ;
     51 ; MOVBE-LABEL: load_i32_by_i8_bswap:
     52 ; MOVBE:       # %bb.0:
     53 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
     54 ; MOVBE-NEXT:    movbel (%eax), %eax
     55 ; MOVBE-NEXT:    retl
     56 ;
     57 ; BSWAP64-LABEL: load_i32_by_i8_bswap:
     58 ; BSWAP64:       # %bb.0:
     59 ; BSWAP64-NEXT:    movl (%rdi), %eax
     60 ; BSWAP64-NEXT:    bswapl %eax
     61 ; BSWAP64-NEXT:    retq
     62 ;
     63 ; MOVBE64-LABEL: load_i32_by_i8_bswap:
     64 ; MOVBE64:       # %bb.0:
     65 ; MOVBE64-NEXT:    movbel (%rdi), %eax
     66 ; MOVBE64-NEXT:    retq
     67   %tmp = bitcast i32* %arg to i8*
     68   %tmp1 = load i8, i8* %tmp, align 1
     69   %tmp2 = zext i8 %tmp1 to i32
     70   %tmp3 = shl nuw nsw i32 %tmp2, 24
     71   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
     72   %tmp5 = load i8, i8* %tmp4, align 1
     73   %tmp6 = zext i8 %tmp5 to i32
     74   %tmp7 = shl nuw nsw i32 %tmp6, 16
     75   %tmp8 = or i32 %tmp7, %tmp3
     76   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
     77   %tmp10 = load i8, i8* %tmp9, align 1
     78   %tmp11 = zext i8 %tmp10 to i32
     79   %tmp12 = shl nuw nsw i32 %tmp11, 8
     80   %tmp13 = or i32 %tmp8, %tmp12
     81   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
     82   %tmp15 = load i8, i8* %tmp14, align 1
     83   %tmp16 = zext i8 %tmp15 to i32
     84   %tmp17 = or i32 %tmp13, %tmp16
     85   ret i32 %tmp17
     86 }
     87 
     88 ; i16* p;
     89 ; (i32) p[0] | ((i32) p[1] << 16)
     90 define i32 @load_i32_by_i16(i32* %arg) {
     91 ; CHECK-LABEL: load_i32_by_i16:
     92 ; CHECK:       # %bb.0:
     93 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
     94 ; CHECK-NEXT:    movl (%eax), %eax
     95 ; CHECK-NEXT:    retl
     96 ;
     97 ; CHECK64-LABEL: load_i32_by_i16:
     98 ; CHECK64:       # %bb.0:
     99 ; CHECK64-NEXT:    movl (%rdi), %eax
    100 ; CHECK64-NEXT:    retq
    101   %tmp = bitcast i32* %arg to i16*
    102   %tmp1 = load i16, i16* %tmp, align 1
    103   %tmp2 = zext i16 %tmp1 to i32
    104   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
    105   %tmp4 = load i16, i16* %tmp3, align 1
    106   %tmp5 = zext i16 %tmp4 to i32
    107   %tmp6 = shl nuw nsw i32 %tmp5, 16
    108   %tmp7 = or i32 %tmp6, %tmp2
    109   ret i32 %tmp7
    110 }
    111 
    112 ; i16* p_16;
    113 ; i8* p_8 = (i8*) p_16;
    114 ; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
    115 define i32 @load_i32_by_i16_i8(i32* %arg) {
    116 ; CHECK-LABEL: load_i32_by_i16_i8:
    117 ; CHECK:       # %bb.0:
    118 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    119 ; CHECK-NEXT:    movl (%eax), %eax
    120 ; CHECK-NEXT:    retl
    121 ;
    122 ; CHECK64-LABEL: load_i32_by_i16_i8:
    123 ; CHECK64:       # %bb.0:
    124 ; CHECK64-NEXT:    movl (%rdi), %eax
    125 ; CHECK64-NEXT:    retq
    126   %tmp = bitcast i32* %arg to i16*
    127   %tmp1 = bitcast i32* %arg to i8*
    128   %tmp2 = load i16, i16* %tmp, align 1
    129   %tmp3 = zext i16 %tmp2 to i32
    130   %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
    131   %tmp5 = load i8, i8* %tmp4, align 1
    132   %tmp6 = zext i8 %tmp5 to i32
    133   %tmp7 = shl nuw nsw i32 %tmp6, 16
    134   %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
    135   %tmp9 = load i8, i8* %tmp8, align 1
    136   %tmp10 = zext i8 %tmp9 to i32
    137   %tmp11 = shl nuw nsw i32 %tmp10, 24
    138   %tmp12 = or i32 %tmp7, %tmp11
    139   %tmp13 = or i32 %tmp12, %tmp3
    140   ret i32 %tmp13
    141 }
    142 
    143 
    144 ; i8* p;
    145 ; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
    146 define i32 @load_i32_by_i16_by_i8(i32* %arg) {
    147 ; CHECK-LABEL: load_i32_by_i16_by_i8:
    148 ; CHECK:       # %bb.0:
    149 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    150 ; CHECK-NEXT:    movl (%eax), %eax
    151 ; CHECK-NEXT:    retl
    152 ;
    153 ; CHECK64-LABEL: load_i32_by_i16_by_i8:
    154 ; CHECK64:       # %bb.0:
    155 ; CHECK64-NEXT:    movl (%rdi), %eax
    156 ; CHECK64-NEXT:    retq
    157   %tmp = bitcast i32* %arg to i8*
    158   %tmp1 = load i8, i8* %tmp, align 1
    159   %tmp2 = zext i8 %tmp1 to i16
    160   %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
    161   %tmp4 = load i8, i8* %tmp3, align 1
    162   %tmp5 = zext i8 %tmp4 to i16
    163   %tmp6 = shl nuw nsw i16 %tmp5, 8
    164   %tmp7 = or i16 %tmp6, %tmp2
    165   %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
    166   %tmp9 = load i8, i8* %tmp8, align 1
    167   %tmp10 = zext i8 %tmp9 to i16
    168   %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
    169   %tmp12 = load i8, i8* %tmp11, align 1
    170   %tmp13 = zext i8 %tmp12 to i16
    171   %tmp14 = shl nuw nsw i16 %tmp13, 8
    172   %tmp15 = or i16 %tmp14, %tmp10
    173   %tmp16 = zext i16 %tmp7 to i32
    174   %tmp17 = zext i16 %tmp15 to i32
    175   %tmp18 = shl nuw nsw i32 %tmp17, 16
    176   %tmp19 = or i32 %tmp18, %tmp16
    177   ret i32 %tmp19
    178 }
    179 
    180 ; i8* p;
    181 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
    182 define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
    183 ; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
    184 ; BSWAP:       # %bb.0:
    185 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
    186 ; BSWAP-NEXT:    movl (%eax), %eax
    187 ; BSWAP-NEXT:    bswapl %eax
    188 ; BSWAP-NEXT:    retl
    189 ;
    190 ; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
    191 ; MOVBE:       # %bb.0:
    192 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    193 ; MOVBE-NEXT:    movbel (%eax), %eax
    194 ; MOVBE-NEXT:    retl
    195 ;
    196 ; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
    197 ; BSWAP64:       # %bb.0:
    198 ; BSWAP64-NEXT:    movl (%rdi), %eax
    199 ; BSWAP64-NEXT:    bswapl %eax
    200 ; BSWAP64-NEXT:    retq
    201 ;
    202 ; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
    203 ; MOVBE64:       # %bb.0:
    204 ; MOVBE64-NEXT:    movbel (%rdi), %eax
    205 ; MOVBE64-NEXT:    retq
    206   %tmp = bitcast i32* %arg to i8*
    207   %tmp1 = load i8, i8* %tmp, align 1
    208   %tmp2 = zext i8 %tmp1 to i16
    209   %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
    210   %tmp4 = load i8, i8* %tmp3, align 1
    211   %tmp5 = zext i8 %tmp4 to i16
    212   %tmp6 = shl nuw nsw i16 %tmp2, 8
    213   %tmp7 = or i16 %tmp6, %tmp5
    214   %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
    215   %tmp9 = load i8, i8* %tmp8, align 1
    216   %tmp10 = zext i8 %tmp9 to i16
    217   %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
    218   %tmp12 = load i8, i8* %tmp11, align 1
    219   %tmp13 = zext i8 %tmp12 to i16
    220   %tmp14 = shl nuw nsw i16 %tmp10, 8
    221   %tmp15 = or i16 %tmp14, %tmp13
    222   %tmp16 = zext i16 %tmp7 to i32
    223   %tmp17 = zext i16 %tmp15 to i32
    224   %tmp18 = shl nuw nsw i32 %tmp16, 16
    225   %tmp19 = or i32 %tmp18, %tmp17
    226   ret i32 %tmp19
    227 }
    228 
    229 ; i8* p;
    230 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
    231 define i64 @load_i64_by_i8(i64* %arg) {
    232 ; CHECK-LABEL: load_i64_by_i8:
    233 ; CHECK:       # %bb.0:
    234 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    235 ; CHECK-NEXT:    movl (%ecx), %eax
    236 ; CHECK-NEXT:    movl 4(%ecx), %edx
    237 ; CHECK-NEXT:    retl
    238 ;
    239 ; CHECK64-LABEL: load_i64_by_i8:
    240 ; CHECK64:       # %bb.0:
    241 ; CHECK64-NEXT:    movq (%rdi), %rax
    242 ; CHECK64-NEXT:    retq
    243   %tmp = bitcast i64* %arg to i8*
    244   %tmp1 = load i8, i8* %tmp, align 1
    245   %tmp2 = zext i8 %tmp1 to i64
    246   %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
    247   %tmp4 = load i8, i8* %tmp3, align 1
    248   %tmp5 = zext i8 %tmp4 to i64
    249   %tmp6 = shl nuw nsw i64 %tmp5, 8
    250   %tmp7 = or i64 %tmp6, %tmp2
    251   %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
    252   %tmp9 = load i8, i8* %tmp8, align 1
    253   %tmp10 = zext i8 %tmp9 to i64
    254   %tmp11 = shl nuw nsw i64 %tmp10, 16
    255   %tmp12 = or i64 %tmp7, %tmp11
    256   %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
    257   %tmp14 = load i8, i8* %tmp13, align 1
    258   %tmp15 = zext i8 %tmp14 to i64
    259   %tmp16 = shl nuw nsw i64 %tmp15, 24
    260   %tmp17 = or i64 %tmp12, %tmp16
    261   %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
    262   %tmp19 = load i8, i8* %tmp18, align 1
    263   %tmp20 = zext i8 %tmp19 to i64
    264   %tmp21 = shl nuw nsw i64 %tmp20, 32
    265   %tmp22 = or i64 %tmp17, %tmp21
    266   %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
    267   %tmp24 = load i8, i8* %tmp23, align 1
    268   %tmp25 = zext i8 %tmp24 to i64
    269   %tmp26 = shl nuw nsw i64 %tmp25, 40
    270   %tmp27 = or i64 %tmp22, %tmp26
    271   %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
    272   %tmp29 = load i8, i8* %tmp28, align 1
    273   %tmp30 = zext i8 %tmp29 to i64
    274   %tmp31 = shl nuw nsw i64 %tmp30, 48
    275   %tmp32 = or i64 %tmp27, %tmp31
    276   %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
    277   %tmp34 = load i8, i8* %tmp33, align 1
    278   %tmp35 = zext i8 %tmp34 to i64
    279   %tmp36 = shl nuw i64 %tmp35, 56
    280   %tmp37 = or i64 %tmp32, %tmp36
    281   ret i64 %tmp37
    282 }
    283 
    284 ; i8* p;
    285 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
    286 define i64 @load_i64_by_i8_bswap(i64* %arg) {
    287 ; BSWAP-LABEL: load_i64_by_i8_bswap:
    288 ; BSWAP:       # %bb.0:
    289 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
    290 ; BSWAP-NEXT:    movl (%eax), %edx
    291 ; BSWAP-NEXT:    movl 4(%eax), %eax
    292 ; BSWAP-NEXT:    bswapl %eax
    293 ; BSWAP-NEXT:    bswapl %edx
    294 ; BSWAP-NEXT:    retl
    295 ;
    296 ; MOVBE-LABEL: load_i64_by_i8_bswap:
    297 ; MOVBE:       # %bb.0:
    298 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    299 ; MOVBE-NEXT:    movbel 4(%ecx), %eax
    300 ; MOVBE-NEXT:    movbel (%ecx), %edx
    301 ; MOVBE-NEXT:    retl
    302 ;
    303 ; BSWAP64-LABEL: load_i64_by_i8_bswap:
    304 ; BSWAP64:       # %bb.0:
    305 ; BSWAP64-NEXT:    movq (%rdi), %rax
    306 ; BSWAP64-NEXT:    bswapq %rax
    307 ; BSWAP64-NEXT:    retq
    308 ;
    309 ; MOVBE64-LABEL: load_i64_by_i8_bswap:
    310 ; MOVBE64:       # %bb.0:
    311 ; MOVBE64-NEXT:    movbeq (%rdi), %rax
    312 ; MOVBE64-NEXT:    retq
    313   %tmp = bitcast i64* %arg to i8*
    314   %tmp1 = load i8, i8* %tmp, align 1
    315   %tmp2 = zext i8 %tmp1 to i64
    316   %tmp3 = shl nuw i64 %tmp2, 56
    317   %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
    318   %tmp5 = load i8, i8* %tmp4, align 1
    319   %tmp6 = zext i8 %tmp5 to i64
    320   %tmp7 = shl nuw nsw i64 %tmp6, 48
    321   %tmp8 = or i64 %tmp7, %tmp3
    322   %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
    323   %tmp10 = load i8, i8* %tmp9, align 1
    324   %tmp11 = zext i8 %tmp10 to i64
    325   %tmp12 = shl nuw nsw i64 %tmp11, 40
    326   %tmp13 = or i64 %tmp8, %tmp12
    327   %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
    328   %tmp15 = load i8, i8* %tmp14, align 1
    329   %tmp16 = zext i8 %tmp15 to i64
    330   %tmp17 = shl nuw nsw i64 %tmp16, 32
    331   %tmp18 = or i64 %tmp13, %tmp17
    332   %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
    333   %tmp20 = load i8, i8* %tmp19, align 1
    334   %tmp21 = zext i8 %tmp20 to i64
    335   %tmp22 = shl nuw nsw i64 %tmp21, 24
    336   %tmp23 = or i64 %tmp18, %tmp22
    337   %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
    338   %tmp25 = load i8, i8* %tmp24, align 1
    339   %tmp26 = zext i8 %tmp25 to i64
    340   %tmp27 = shl nuw nsw i64 %tmp26, 16
    341   %tmp28 = or i64 %tmp23, %tmp27
    342   %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
    343   %tmp30 = load i8, i8* %tmp29, align 1
    344   %tmp31 = zext i8 %tmp30 to i64
    345   %tmp32 = shl nuw nsw i64 %tmp31, 8
    346   %tmp33 = or i64 %tmp28, %tmp32
    347   %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
    348   %tmp35 = load i8, i8* %tmp34, align 1
    349   %tmp36 = zext i8 %tmp35 to i64
    350   %tmp37 = or i64 %tmp33, %tmp36
    351   ret i64 %tmp37
    352 }
    353 
    354 ; Part of the load by bytes pattern is used outside of the pattern
    355 ; i8* p;
    356 ; i32 x = (i32) p[1]
    357 ; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
    358 ; x | res
    359 define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
    360 ; CHECK-LABEL: load_i32_by_i8_bswap_uses:
    361 ; CHECK:       # %bb.0:
    362 ; CHECK-NEXT:    pushl %esi
    363 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
    364 ; CHECK-NEXT:    .cfi_offset %esi, -8
    365 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    366 ; CHECK-NEXT:    movzbl (%eax), %ecx
    367 ; CHECK-NEXT:    shll $24, %ecx
    368 ; CHECK-NEXT:    movzbl 1(%eax), %edx
    369 ; CHECK-NEXT:    movl %edx, %esi
    370 ; CHECK-NEXT:    shll $16, %esi
    371 ; CHECK-NEXT:    orl %ecx, %esi
    372 ; CHECK-NEXT:    movzbl 2(%eax), %ecx
    373 ; CHECK-NEXT:    shll $8, %ecx
    374 ; CHECK-NEXT:    orl %esi, %ecx
    375 ; CHECK-NEXT:    movzbl 3(%eax), %eax
    376 ; CHECK-NEXT:    orl %ecx, %eax
    377 ; CHECK-NEXT:    orl %edx, %eax
    378 ; CHECK-NEXT:    popl %esi
    379 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
    380 ; CHECK-NEXT:    retl
    381 ;
    382 ; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
    383 ; CHECK64:       # %bb.0:
    384 ; CHECK64-NEXT:    movzbl (%rdi), %eax
    385 ; CHECK64-NEXT:    shll $24, %eax
    386 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
    387 ; CHECK64-NEXT:    movl %ecx, %edx
    388 ; CHECK64-NEXT:    shll $16, %edx
    389 ; CHECK64-NEXT:    orl %eax, %edx
    390 ; CHECK64-NEXT:    movzbl 2(%rdi), %esi
    391 ; CHECK64-NEXT:    shll $8, %esi
    392 ; CHECK64-NEXT:    orl %edx, %esi
    393 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
    394 ; CHECK64-NEXT:    orl %esi, %eax
    395 ; CHECK64-NEXT:    orl %ecx, %eax
    396 ; CHECK64-NEXT:    retq
    397   %tmp = bitcast i32* %arg to i8*
    398   %tmp1 = load i8, i8* %tmp, align 1
    399   %tmp2 = zext i8 %tmp1 to i32
    400   %tmp3 = shl nuw nsw i32 %tmp2, 24
    401   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
    402   %tmp5 = load i8, i8* %tmp4, align 1
    403   %tmp6 = zext i8 %tmp5 to i32
    404   %tmp7 = shl nuw nsw i32 %tmp6, 16
    405   %tmp8 = or i32 %tmp7, %tmp3
    406   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
    407   %tmp10 = load i8, i8* %tmp9, align 1
    408   %tmp11 = zext i8 %tmp10 to i32
    409   %tmp12 = shl nuw nsw i32 %tmp11, 8
    410   %tmp13 = or i32 %tmp8, %tmp12
    411   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
    412   %tmp15 = load i8, i8* %tmp14, align 1
    413   %tmp16 = zext i8 %tmp15 to i32
    414   %tmp17 = or i32 %tmp13, %tmp16
    415   ; Use individual part of the pattern outside of the pattern
    416   %tmp18 = or i32 %tmp6, %tmp17
    417   ret i32 %tmp18
    418 }
    419 
    420 ; One of the loads is volatile
    421 ; i8* p;
    422 ; p0 = volatile *p;
    423 ; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
    424 define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
    425 ; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
    426 ; CHECK:       # %bb.0:
    427 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    428 ; CHECK-NEXT:    movzbl (%eax), %ecx
    429 ; CHECK-NEXT:    shll $24, %ecx
    430 ; CHECK-NEXT:    movzbl 1(%eax), %edx
    431 ; CHECK-NEXT:    shll $16, %edx
    432 ; CHECK-NEXT:    orl %ecx, %edx
    433 ; CHECK-NEXT:    movzbl 2(%eax), %ecx
    434 ; CHECK-NEXT:    shll $8, %ecx
    435 ; CHECK-NEXT:    orl %edx, %ecx
    436 ; CHECK-NEXT:    movzbl 3(%eax), %eax
    437 ; CHECK-NEXT:    orl %ecx, %eax
    438 ; CHECK-NEXT:    retl
    439 ;
    440 ; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
    441 ; CHECK64:       # %bb.0:
    442 ; CHECK64-NEXT:    movzbl (%rdi), %eax
    443 ; CHECK64-NEXT:    shll $24, %eax
    444 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
    445 ; CHECK64-NEXT:    shll $16, %ecx
    446 ; CHECK64-NEXT:    orl %eax, %ecx
    447 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
    448 ; CHECK64-NEXT:    shll $8, %edx
    449 ; CHECK64-NEXT:    orl %ecx, %edx
    450 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
    451 ; CHECK64-NEXT:    orl %edx, %eax
    452 ; CHECK64-NEXT:    retq
    453   %tmp = bitcast i32* %arg to i8*
    454   %tmp1 = load volatile i8, i8* %tmp, align 1
    455   %tmp2 = zext i8 %tmp1 to i32
    456   %tmp3 = shl nuw nsw i32 %tmp2, 24
    457   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
    458   %tmp5 = load i8, i8* %tmp4, align 1
    459   %tmp6 = zext i8 %tmp5 to i32
    460   %tmp7 = shl nuw nsw i32 %tmp6, 16
    461   %tmp8 = or i32 %tmp7, %tmp3
    462   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
    463   %tmp10 = load i8, i8* %tmp9, align 1
    464   %tmp11 = zext i8 %tmp10 to i32
    465   %tmp12 = shl nuw nsw i32 %tmp11, 8
    466   %tmp13 = or i32 %tmp8, %tmp12
    467   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
    468   %tmp15 = load i8, i8* %tmp14, align 1
    469   %tmp16 = zext i8 %tmp15 to i32
    470   %tmp17 = or i32 %tmp13, %tmp16
    471   ret i32 %tmp17
    472 }
    473 
    474 ; There is a store in between individual loads
    475 ; i8* p, q;
    476 ; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
    477 ; *q = 0;
    478 ; res2 = ((i32) p[2] << 8) | (i32) p[3]
    479 ; res1 | res2
    480 define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
    481 ; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
    482 ; CHECK:       # %bb.0:
    483 ; CHECK-NEXT:    pushl %esi
    484 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
    485 ; CHECK-NEXT:    .cfi_offset %esi, -8
    486 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    487 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    488 ; CHECK-NEXT:    movzbl (%ecx), %edx
    489 ; CHECK-NEXT:    shll $24, %edx
    490 ; CHECK-NEXT:    movzbl 1(%ecx), %esi
    491 ; CHECK-NEXT:    movl $0, (%eax)
    492 ; CHECK-NEXT:    shll $16, %esi
    493 ; CHECK-NEXT:    orl %edx, %esi
    494 ; CHECK-NEXT:    movzbl 2(%ecx), %edx
    495 ; CHECK-NEXT:    shll $8, %edx
    496 ; CHECK-NEXT:    orl %esi, %edx
    497 ; CHECK-NEXT:    movzbl 3(%ecx), %eax
    498 ; CHECK-NEXT:    orl %edx, %eax
    499 ; CHECK-NEXT:    popl %esi
    500 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
    501 ; CHECK-NEXT:    retl
    502 ;
    503 ; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
    504 ; CHECK64:       # %bb.0:
    505 ; CHECK64-NEXT:    movzbl (%rdi), %eax
    506 ; CHECK64-NEXT:    shll $24, %eax
    507 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
    508 ; CHECK64-NEXT:    movl $0, (%rsi)
    509 ; CHECK64-NEXT:    shll $16, %ecx
    510 ; CHECK64-NEXT:    orl %eax, %ecx
    511 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
    512 ; CHECK64-NEXT:    shll $8, %edx
    513 ; CHECK64-NEXT:    orl %ecx, %edx
    514 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
    515 ; CHECK64-NEXT:    orl %edx, %eax
    516 ; CHECK64-NEXT:    retq
    517   %tmp = bitcast i32* %arg to i8*
    518   %tmp2 = load i8, i8* %tmp, align 1
    519   %tmp3 = zext i8 %tmp2 to i32
    520   %tmp4 = shl nuw nsw i32 %tmp3, 24
    521   %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
    522   %tmp6 = load i8, i8* %tmp5, align 1
    523   ; This store will prevent folding of the pattern
    524   store i32 0, i32* %arg1
    525   %tmp7 = zext i8 %tmp6 to i32
    526   %tmp8 = shl nuw nsw i32 %tmp7, 16
    527   %tmp9 = or i32 %tmp8, %tmp4
    528   %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
    529   %tmp11 = load i8, i8* %tmp10, align 1
    530   %tmp12 = zext i8 %tmp11 to i32
    531   %tmp13 = shl nuw nsw i32 %tmp12, 8
    532   %tmp14 = or i32 %tmp9, %tmp13
    533   %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
    534   %tmp16 = load i8, i8* %tmp15, align 1
    535   %tmp17 = zext i8 %tmp16 to i32
    536   %tmp18 = or i32 %tmp14, %tmp17
    537   ret i32 %tmp18
    538 }
    539 
    540 ; One of the loads is from an unrelated location
    541 ; i8* p, q;
    542 ; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
    543 define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
    544 ; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
    545 ; CHECK:       # %bb.0:
    546 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    547 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    548 ; CHECK-NEXT:    movzbl (%ecx), %edx
    549 ; CHECK-NEXT:    shll $24, %edx
    550 ; CHECK-NEXT:    movzbl 1(%eax), %eax
    551 ; CHECK-NEXT:    shll $16, %eax
    552 ; CHECK-NEXT:    orl %edx, %eax
    553 ; CHECK-NEXT:    movzbl 2(%ecx), %edx
    554 ; CHECK-NEXT:    shll $8, %edx
    555 ; CHECK-NEXT:    orl %eax, %edx
    556 ; CHECK-NEXT:    movzbl 3(%ecx), %eax
    557 ; CHECK-NEXT:    orl %edx, %eax
    558 ; CHECK-NEXT:    retl
    559 ;
    560 ; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
    561 ; CHECK64:       # %bb.0:
    562 ; CHECK64-NEXT:    movzbl (%rdi), %eax
    563 ; CHECK64-NEXT:    shll $24, %eax
    564 ; CHECK64-NEXT:    movzbl 1(%rsi), %ecx
    565 ; CHECK64-NEXT:    shll $16, %ecx
    566 ; CHECK64-NEXT:    orl %eax, %ecx
    567 ; CHECK64-NEXT:    movzbl 2(%rdi), %edx
    568 ; CHECK64-NEXT:    shll $8, %edx
    569 ; CHECK64-NEXT:    orl %ecx, %edx
    570 ; CHECK64-NEXT:    movzbl 3(%rdi), %eax
    571 ; CHECK64-NEXT:    orl %edx, %eax
    572 ; CHECK64-NEXT:    retq
    573   %tmp = bitcast i32* %arg to i8*
    574   %tmp2 = bitcast i32* %arg1 to i8*
    575   %tmp3 = load i8, i8* %tmp, align 1
    576   %tmp4 = zext i8 %tmp3 to i32
    577   %tmp5 = shl nuw nsw i32 %tmp4, 24
    578   ; Load from an unrelated address
    579   %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
    580   %tmp7 = load i8, i8* %tmp6, align 1
    581   %tmp8 = zext i8 %tmp7 to i32
    582   %tmp9 = shl nuw nsw i32 %tmp8, 16
    583   %tmp10 = or i32 %tmp9, %tmp5
    584   %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
    585   %tmp12 = load i8, i8* %tmp11, align 1
    586   %tmp13 = zext i8 %tmp12 to i32
    587   %tmp14 = shl nuw nsw i32 %tmp13, 8
    588   %tmp15 = or i32 %tmp10, %tmp14
    589   %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
    590   %tmp17 = load i8, i8* %tmp16, align 1
    591   %tmp18 = zext i8 %tmp17 to i32
    592   %tmp19 = or i32 %tmp15, %tmp18
    593   ret i32 %tmp19
    594 }
    595 
    596 ; i8* p;
    597 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
    598 define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
    599 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
    600 ; CHECK:       # %bb.0:
    601 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    602 ; CHECK-NEXT:    movl 1(%eax), %eax
    603 ; CHECK-NEXT:    retl
    604 ;
    605 ; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
    606 ; CHECK64:       # %bb.0:
    607 ; CHECK64-NEXT:    movl 1(%rdi), %eax
    608 ; CHECK64-NEXT:    retq
    609   %tmp = bitcast i32* %arg to i8*
    610   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    611   %tmp2 = load i8, i8* %tmp1, align 1
    612   %tmp3 = zext i8 %tmp2 to i32
    613   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
    614   %tmp5 = load i8, i8* %tmp4, align 1
    615   %tmp6 = zext i8 %tmp5 to i32
    616   %tmp7 = shl nuw nsw i32 %tmp6, 8
    617   %tmp8 = or i32 %tmp7, %tmp3
    618   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
    619   %tmp10 = load i8, i8* %tmp9, align 1
    620   %tmp11 = zext i8 %tmp10 to i32
    621   %tmp12 = shl nuw nsw i32 %tmp11, 16
    622   %tmp13 = or i32 %tmp8, %tmp12
    623   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
    624   %tmp15 = load i8, i8* %tmp14, align 1
    625   %tmp16 = zext i8 %tmp15 to i32
    626   %tmp17 = shl nuw nsw i32 %tmp16, 24
    627   %tmp18 = or i32 %tmp13, %tmp17
    628   ret i32 %tmp18
    629 }
    630 
    631 ; i8* p;
    632 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
    633 define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
    634 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
    635 ; CHECK:       # %bb.0:
    636 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    637 ; CHECK-NEXT:    movl -4(%eax), %eax
    638 ; CHECK-NEXT:    retl
    639 ;
    640 ; CHECK64-LABEL: load_i32_by_i8_neg_offset:
    641 ; CHECK64:       # %bb.0:
    642 ; CHECK64-NEXT:    movl -4(%rdi), %eax
    643 ; CHECK64-NEXT:    retq
    644   %tmp = bitcast i32* %arg to i8*
    645   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
    646   %tmp2 = load i8, i8* %tmp1, align 1
    647   %tmp3 = zext i8 %tmp2 to i32
    648   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
    649   %tmp5 = load i8, i8* %tmp4, align 1
    650   %tmp6 = zext i8 %tmp5 to i32
    651   %tmp7 = shl nuw nsw i32 %tmp6, 8
    652   %tmp8 = or i32 %tmp7, %tmp3
    653   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
    654   %tmp10 = load i8, i8* %tmp9, align 1
    655   %tmp11 = zext i8 %tmp10 to i32
    656   %tmp12 = shl nuw nsw i32 %tmp11, 16
    657   %tmp13 = or i32 %tmp8, %tmp12
    658   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
    659   %tmp15 = load i8, i8* %tmp14, align 1
    660   %tmp16 = zext i8 %tmp15 to i32
    661   %tmp17 = shl nuw nsw i32 %tmp16, 24
    662   %tmp18 = or i32 %tmp13, %tmp17
    663   ret i32 %tmp18
    664 }
    665 
    666 ; i8* p;
    667 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
    668 define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
    669 ; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
    670 ; BSWAP:       # %bb.0:
    671 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
    672 ; BSWAP-NEXT:    movl 1(%eax), %eax
    673 ; BSWAP-NEXT:    bswapl %eax
    674 ; BSWAP-NEXT:    retl
    675 ;
    676 ; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
    677 ; MOVBE:       # %bb.0:
    678 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    679 ; MOVBE-NEXT:    movbel 1(%eax), %eax
    680 ; MOVBE-NEXT:    retl
    681 ;
    682 ; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
    683 ; BSWAP64:       # %bb.0:
    684 ; BSWAP64-NEXT:    movl 1(%rdi), %eax
    685 ; BSWAP64-NEXT:    bswapl %eax
    686 ; BSWAP64-NEXT:    retq
    687 ;
    688 ; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
    689 ; MOVBE64:       # %bb.0:
    690 ; MOVBE64-NEXT:    movbel 1(%rdi), %eax
    691 ; MOVBE64-NEXT:    retq
    692   %tmp = bitcast i32* %arg to i8*
    693   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
    694   %tmp2 = load i8, i8* %tmp1, align 1
    695   %tmp3 = zext i8 %tmp2 to i32
    696   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
    697   %tmp5 = load i8, i8* %tmp4, align 1
    698   %tmp6 = zext i8 %tmp5 to i32
    699   %tmp7 = shl nuw nsw i32 %tmp6, 8
    700   %tmp8 = or i32 %tmp7, %tmp3
    701   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
    702   %tmp10 = load i8, i8* %tmp9, align 1
    703   %tmp11 = zext i8 %tmp10 to i32
    704   %tmp12 = shl nuw nsw i32 %tmp11, 16
    705   %tmp13 = or i32 %tmp8, %tmp12
    706   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
    707   %tmp15 = load i8, i8* %tmp14, align 1
    708   %tmp16 = zext i8 %tmp15 to i32
    709   %tmp17 = shl nuw nsw i32 %tmp16, 24
    710   %tmp18 = or i32 %tmp13, %tmp17
    711   ret i32 %tmp18
    712 }
    713 
    714 ; i8* p;
    715 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
    716 define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
    717 ; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
    718 ; BSWAP:       # %bb.0:
    719 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
    720 ; BSWAP-NEXT:    movl -4(%eax), %eax
    721 ; BSWAP-NEXT:    bswapl %eax
    722 ; BSWAP-NEXT:    retl
    723 ;
    724 ; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
    725 ; MOVBE:       # %bb.0:
    726 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    727 ; MOVBE-NEXT:    movbel -4(%eax), %eax
    728 ; MOVBE-NEXT:    retl
    729 ;
    730 ; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
    731 ; BSWAP64:       # %bb.0:
    732 ; BSWAP64-NEXT:    movl -4(%rdi), %eax
    733 ; BSWAP64-NEXT:    bswapl %eax
    734 ; BSWAP64-NEXT:    retq
    735 ;
    736 ; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
    737 ; MOVBE64:       # %bb.0:
    738 ; MOVBE64-NEXT:    movbel -4(%rdi), %eax
    739 ; MOVBE64-NEXT:    retq
    740   %tmp = bitcast i32* %arg to i8*
    741   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
    742   %tmp2 = load i8, i8* %tmp1, align 1
    743   %tmp3 = zext i8 %tmp2 to i32
    744   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
    745   %tmp5 = load i8, i8* %tmp4, align 1
    746   %tmp6 = zext i8 %tmp5 to i32
    747   %tmp7 = shl nuw nsw i32 %tmp6, 8
    748   %tmp8 = or i32 %tmp7, %tmp3
    749   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
    750   %tmp10 = load i8, i8* %tmp9, align 1
    751   %tmp11 = zext i8 %tmp10 to i32
    752   %tmp12 = shl nuw nsw i32 %tmp11, 16
    753   %tmp13 = or i32 %tmp8, %tmp12
    754   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
    755   %tmp15 = load i8, i8* %tmp14, align 1
    756   %tmp16 = zext i8 %tmp15 to i32
    757   %tmp17 = shl nuw nsw i32 %tmp16, 24
    758   %tmp18 = or i32 %tmp13, %tmp17
    759   ret i32 %tmp18
    760 }
    761 
    762 ; i8* p; i32 i;
    763 ; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
    764 define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
    765 ; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
    766 ; BSWAP:       # %bb.0:
    767 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
    768 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    769 ; BSWAP-NEXT:    movl (%ecx,%eax), %eax
    770 ; BSWAP-NEXT:    bswapl %eax
    771 ; BSWAP-NEXT:    retl
    772 ;
    773 ; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
    774 ; MOVBE:       # %bb.0:
    775 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    776 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    777 ; MOVBE-NEXT:    movbel (%ecx,%eax), %eax
    778 ; MOVBE-NEXT:    retl
    779 ;
    780 ; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
    781 ; BSWAP64:       # %bb.0:
    782 ; BSWAP64-NEXT:    movslq %esi, %rax
    783 ; BSWAP64-NEXT:    movl (%rdi,%rax), %eax
    784 ; BSWAP64-NEXT:    bswapl %eax
    785 ; BSWAP64-NEXT:    retq
    786 ;
    787 ; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
    788 ; MOVBE64:       # %bb.0:
    789 ; MOVBE64-NEXT:    movslq %esi, %rax
    790 ; MOVBE64-NEXT:    movbel (%rdi,%rax), %eax
    791 ; MOVBE64-NEXT:    retq
    792   %tmp = bitcast i32* %arg to i8*
    793   %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
    794   %tmp3 = load i8, i8* %tmp2, align 1
    795   %tmp4 = zext i8 %tmp3 to i32
    796   %tmp5 = shl nuw nsw i32 %tmp4, 24
    797   %tmp6 = add nuw nsw i32 %arg1, 1
    798   %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
    799   %tmp8 = load i8, i8* %tmp7, align 1
    800   %tmp9 = zext i8 %tmp8 to i32
    801   %tmp10 = shl nuw nsw i32 %tmp9, 16
    802   %tmp11 = or i32 %tmp10, %tmp5
    803   %tmp12 = add nuw nsw i32 %arg1, 2
    804   %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
    805   %tmp14 = load i8, i8* %tmp13, align 1
    806   %tmp15 = zext i8 %tmp14 to i32
    807   %tmp16 = shl nuw nsw i32 %tmp15, 8
    808   %tmp17 = or i32 %tmp11, %tmp16
    809   %tmp18 = add nuw nsw i32 %arg1, 3
    810   %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
    811   %tmp20 = load i8, i8* %tmp19, align 1
    812   %tmp21 = zext i8 %tmp20 to i32
    813   %tmp22 = or i32 %tmp17, %tmp21
    814   ret i32 %tmp22
    815 }
    816 
    817 ; Verify that we don't crash handling shl i32 %conv57, 32
    818 define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
    819 ; CHECK-LABEL: shift_i32_by_32:
    820 ; CHECK:       # %bb.0: # %entry
    821 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    822 ; CHECK-NEXT:    movl $-1, 4(%eax)
    823 ; CHECK-NEXT:    movl $-1, (%eax)
    824 ; CHECK-NEXT:    retl
    825 ;
    826 ; CHECK64-LABEL: shift_i32_by_32:
    827 ; CHECK64:       # %bb.0: # %entry
    828 ; CHECK64-NEXT:    movq $-1, (%rdx)
    829 ; CHECK64-NEXT:    retq
    830 entry:
    831   %load1 = load i8, i8* %src1, align 1
    832   %conv46 = zext i8 %load1 to i32
    833   %shl47 = shl i32 %conv46, 56
    834   %or55 = or i32 %shl47, 0
    835   %load2 = load i8, i8* %src2, align 1
    836   %conv57 = zext i8 %load2 to i32
    837   %shl58 = shl i32 %conv57, 32
    838   %or59 = or i32 %or55, %shl58
    839   %or74 = or i32 %or59, 0
    840   %conv75 = sext i32 %or74 to i64
    841   store i64 %conv75, i64* %dst, align 8
    842   ret void
    843 }
    844 
    845 declare i16 @llvm.bswap.i16(i16)
    846 
    847 ; i16* p;
    848 ; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
    849 define i32 @load_i32_by_bswap_i16(i32* %arg) {
    850 ; BSWAP-LABEL: load_i32_by_bswap_i16:
    851 ; BSWAP:       # %bb.0:
    852 ; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
    853 ; BSWAP-NEXT:    movl (%eax), %eax
    854 ; BSWAP-NEXT:    bswapl %eax
    855 ; BSWAP-NEXT:    retl
    856 ;
    857 ; MOVBE-LABEL: load_i32_by_bswap_i16:
    858 ; MOVBE:       # %bb.0:
    859 ; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    860 ; MOVBE-NEXT:    movbel (%eax), %eax
    861 ; MOVBE-NEXT:    retl
    862 ;
    863 ; BSWAP64-LABEL: load_i32_by_bswap_i16:
    864 ; BSWAP64:       # %bb.0:
    865 ; BSWAP64-NEXT:    movl (%rdi), %eax
    866 ; BSWAP64-NEXT:    bswapl %eax
    867 ; BSWAP64-NEXT:    retq
    868 ;
    869 ; MOVBE64-LABEL: load_i32_by_bswap_i16:
    870 ; MOVBE64:       # %bb.0:
    871 ; MOVBE64-NEXT:    movbel (%rdi), %eax
    872 ; MOVBE64-NEXT:    retq
    873   %tmp = bitcast i32* %arg to i16*
    874   %tmp1 = load i16, i16* %tmp, align 4
    875   %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
    876   %tmp2 = zext i16 %tmp11 to i32
    877   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
    878   %tmp4 = load i16, i16* %tmp3, align 1
    879   %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
    880   %tmp5 = zext i16 %tmp41 to i32
    881   %tmp6 = shl nuw nsw i32 %tmp2, 16
    882   %tmp7 = or i32 %tmp6, %tmp5
    883   ret i32 %tmp7
    884 }
    885 
    886 ; i16* p;
    887 ; (i32) p[0] | (sext(p[1] << 16) to i32)
    888 define i32 @load_i32_by_sext_i16(i32* %arg) {
    889 ; CHECK-LABEL: load_i32_by_sext_i16:
    890 ; CHECK:       # %bb.0:
    891 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    892 ; CHECK-NEXT:    movl (%eax), %eax
    893 ; CHECK-NEXT:    retl
    894 ;
    895 ; CHECK64-LABEL: load_i32_by_sext_i16:
    896 ; CHECK64:       # %bb.0:
    897 ; CHECK64-NEXT:    movl (%rdi), %eax
    898 ; CHECK64-NEXT:    retq
    899   %tmp = bitcast i32* %arg to i16*
    900   %tmp1 = load i16, i16* %tmp, align 1
    901   %tmp2 = zext i16 %tmp1 to i32
    902   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
    903   %tmp4 = load i16, i16* %tmp3, align 1
    904   %tmp5 = sext i16 %tmp4 to i32
    905   %tmp6 = shl nuw nsw i32 %tmp5, 16
    906   %tmp7 = or i32 %tmp6, %tmp2
    907   ret i32 %tmp7
    908 }
    909 
    910 ; i8* arg; i32 i;
    911 ; p = arg + 12;
    912 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
    913 define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
    914 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
    915 ; CHECK:       # %bb.0:
    916 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    917 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    918 ; CHECK-NEXT:    movl 12(%ecx,%eax), %eax
    919 ; CHECK-NEXT:    retl
    920 ;
    921 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
    922 ; CHECK64:       # %bb.0:
    923 ; CHECK64-NEXT:    movl %esi, %eax
    924 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
    925 ; CHECK64-NEXT:    retq
    926   %tmp = add nuw nsw i32 %i, 3
    927   %tmp2 = add nuw nsw i32 %i, 2
    928   %tmp3 = add nuw nsw i32 %i, 1
    929   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
    930   %tmp5 = zext i32 %i to i64
    931   %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
    932   %tmp7 = load i8, i8* %tmp6, align 1
    933   %tmp8 = zext i8 %tmp7 to i32
    934   %tmp9 = zext i32 %tmp3 to i64
    935   %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
    936   %tmp11 = load i8, i8* %tmp10, align 1
    937   %tmp12 = zext i8 %tmp11 to i32
    938   %tmp13 = shl nuw nsw i32 %tmp12, 8
    939   %tmp14 = or i32 %tmp13, %tmp8
    940   %tmp15 = zext i32 %tmp2 to i64
    941   %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
    942   %tmp17 = load i8, i8* %tmp16, align 1
    943   %tmp18 = zext i8 %tmp17 to i32
    944   %tmp19 = shl nuw nsw i32 %tmp18, 16
    945   %tmp20 = or i32 %tmp14, %tmp19
    946   %tmp21 = zext i32 %tmp to i64
    947   %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
    948   %tmp23 = load i8, i8* %tmp22, align 1
    949   %tmp24 = zext i8 %tmp23 to i32
    950   %tmp25 = shl nuw i32 %tmp24, 24
    951   %tmp26 = or i32 %tmp20, %tmp25
    952   ret i32 %tmp26
    953 }
    954 
    955 ; i8* arg; i32 i;
    956 ; p = arg + 12;
    957 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
    958 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
    959 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
    960 ; CHECK:       # %bb.0:
    961 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    962 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    963 ; CHECK-NEXT:    movl 13(%ecx,%eax), %eax
    964 ; CHECK-NEXT:    retl
    965 ;
    966 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
    967 ; CHECK64:       # %bb.0:
    968 ; CHECK64-NEXT:    movl %esi, %eax
    969 ; CHECK64-NEXT:    movl 13(%rdi,%rax), %eax
    970 ; CHECK64-NEXT:    retq
    971   %tmp = add nuw nsw i32 %i, 4
    972   %tmp2 = add nuw nsw i32 %i, 3
    973   %tmp3 = add nuw nsw i32 %i, 2
    974   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
    975   %tmp5 = add nuw nsw i32 %i, 1
    976   %tmp27 = zext i32 %tmp5 to i64
    977   %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
    978   %tmp29 = load i8, i8* %tmp28, align 1
    979   %tmp30 = zext i8 %tmp29 to i32
    980   %tmp31 = zext i32 %tmp3 to i64
    981   %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
    982   %tmp33 = load i8, i8* %tmp32, align 1
    983   %tmp34 = zext i8 %tmp33 to i32
    984   %tmp35 = shl nuw nsw i32 %tmp34, 8
    985   %tmp36 = or i32 %tmp35, %tmp30
    986   %tmp37 = zext i32 %tmp2 to i64
    987   %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
    988   %tmp39 = load i8, i8* %tmp38, align 1
    989   %tmp40 = zext i8 %tmp39 to i32
    990   %tmp41 = shl nuw nsw i32 %tmp40, 16
    991   %tmp42 = or i32 %tmp36, %tmp41
    992   %tmp43 = zext i32 %tmp to i64
    993   %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
    994   %tmp45 = load i8, i8* %tmp44, align 1
    995   %tmp46 = zext i8 %tmp45 to i32
    996   %tmp47 = shl nuw i32 %tmp46, 24
    997   %tmp48 = or i32 %tmp42, %tmp47
    998   ret i32 %tmp48
    999 }
   1000 
   1001 ; i8* arg; i32 i;
   1002 ;
   1003 ; p0 = arg;
   1004 ; p1 = arg + i + 1;
   1005 ; p2 = arg + i + 2;
   1006 ; p3 = arg + i + 3;
   1007 ;
   1008 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
   1009 ;
   1010 ; This test excercises zero and any extend loads as a part of load combine pattern.
   1011 ; In order to fold the pattern above we need to reassociate the address computation
   1012 ; first. By the time the address computation is reassociated loads are combined to
   1013 ; to zext and aext loads.
   1014 define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
   1015 ; CHECK-LABEL: load_i32_by_i8_zaext_loads:
   1016 ; CHECK:       # %bb.0:
   1017 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1018 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   1019 ; CHECK-NEXT:    movl 12(%ecx,%eax), %eax
   1020 ; CHECK-NEXT:    retl
   1021 ;
   1022 ; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
   1023 ; CHECK64:       # %bb.0:
   1024 ; CHECK64-NEXT:    movl %esi, %eax
   1025 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
   1026 ; CHECK64-NEXT:    retq
   1027   %tmp = add nuw nsw i32 %arg1, 3
   1028   %tmp2 = add nuw nsw i32 %arg1, 2
   1029   %tmp3 = add nuw nsw i32 %arg1, 1
   1030   %tmp4 = zext i32 %tmp to i64
   1031   %tmp5 = zext i32 %tmp2 to i64
   1032   %tmp6 = zext i32 %tmp3 to i64
   1033   %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
   1034   %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
   1035   %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
   1036   %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
   1037   %tmp33 = zext i32 %arg1 to i64
   1038   %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
   1039   %tmp35 = load i8, i8* %tmp34, align 1
   1040   %tmp36 = zext i8 %tmp35 to i32
   1041   %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
   1042   %tmp38 = load i8, i8* %tmp37, align 1
   1043   %tmp39 = zext i8 %tmp38 to i32
   1044   %tmp40 = shl nuw nsw i32 %tmp39, 8
   1045   %tmp41 = or i32 %tmp40, %tmp36
   1046   %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
   1047   %tmp43 = load i8, i8* %tmp42, align 1
   1048   %tmp44 = zext i8 %tmp43 to i32
   1049   %tmp45 = shl nuw nsw i32 %tmp44, 16
   1050   %tmp46 = or i32 %tmp41, %tmp45
   1051   %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
   1052   %tmp48 = load i8, i8* %tmp47, align 1
   1053   %tmp49 = zext i8 %tmp48 to i32
   1054   %tmp50 = shl nuw i32 %tmp49, 24
   1055   %tmp51 = or i32 %tmp46, %tmp50
   1056   ret i32 %tmp51
   1057 }
   1058 
   1059 ; The same as load_i32_by_i8_zaext_loads but the last load is combined to
   1060 ; a sext load.
   1061 ;
   1062 ; i8* arg; i32 i;
   1063 ;
   1064 ; p0 = arg;
   1065 ; p1 = arg + i + 1;
   1066 ; p2 = arg + i + 2;
   1067 ; p3 = arg + i + 3;
   1068 ;
   1069 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
   1070 define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
   1071 ; CHECK-LABEL: load_i32_by_i8_zsext_loads:
   1072 ; CHECK:       # %bb.0:
   1073 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1074 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   1075 ; CHECK-NEXT:    movl 12(%ecx,%eax), %eax
   1076 ; CHECK-NEXT:    retl
   1077 ;
   1078 ; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
   1079 ; CHECK64:       # %bb.0:
   1080 ; CHECK64-NEXT:    movl %esi, %eax
   1081 ; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
   1082 ; CHECK64-NEXT:    retq
   1083   %tmp = add nuw nsw i32 %arg1, 3
   1084   %tmp2 = add nuw nsw i32 %arg1, 2
   1085   %tmp3 = add nuw nsw i32 %arg1, 1
   1086   %tmp4 = zext i32 %tmp to i64
   1087   %tmp5 = zext i32 %tmp2 to i64
   1088   %tmp6 = zext i32 %tmp3 to i64
   1089   %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
   1090   %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
   1091   %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
   1092   %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
   1093   %tmp33 = zext i32 %arg1 to i64
   1094   %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
   1095   %tmp35 = load i8, i8* %tmp34, align 1
   1096   %tmp36 = zext i8 %tmp35 to i32
   1097   %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
   1098   %tmp38 = load i8, i8* %tmp37, align 1
   1099   %tmp39 = zext i8 %tmp38 to i32
   1100   %tmp40 = shl nuw nsw i32 %tmp39, 8
   1101   %tmp41 = or i32 %tmp40, %tmp36
   1102   %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
   1103   %tmp43 = load i8, i8* %tmp42, align 1
   1104   %tmp44 = zext i8 %tmp43 to i32
   1105   %tmp45 = shl nuw nsw i32 %tmp44, 16
   1106   %tmp46 = or i32 %tmp41, %tmp45
   1107   %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
   1108   %tmp48 = load i8, i8* %tmp47, align 1
   1109   %tmp49 = sext i8 %tmp48 to i16
   1110   %tmp50 = zext i16 %tmp49 to i32
   1111   %tmp51 = shl nuw i32 %tmp50, 24
   1112   %tmp52 = or i32 %tmp46, %tmp51
   1113   ret i32 %tmp52
   1114 }
   1115 
   1116 ; i8* p;
   1117 ; (i32) p[0] | ((i32) p[1] << 8)
   1118 define i32 @zext_load_i32_by_i8(i32* %arg) {
   1119 ; CHECK-LABEL: zext_load_i32_by_i8:
   1120 ; CHECK:       # %bb.0:
   1121 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1122 ; CHECK-NEXT:    movzbl (%eax), %ecx
   1123 ; CHECK-NEXT:    movzbl 1(%eax), %eax
   1124 ; CHECK-NEXT:    shll $8, %eax
   1125 ; CHECK-NEXT:    orl %ecx, %eax
   1126 ; CHECK-NEXT:    retl
   1127 ;
   1128 ; CHECK64-LABEL: zext_load_i32_by_i8:
   1129 ; CHECK64:       # %bb.0:
   1130 ; CHECK64-NEXT:    movzbl (%rdi), %ecx
   1131 ; CHECK64-NEXT:    movzbl 1(%rdi), %eax
   1132 ; CHECK64-NEXT:    shll $8, %eax
   1133 ; CHECK64-NEXT:    orl %ecx, %eax
   1134 ; CHECK64-NEXT:    retq
   1135   %tmp = bitcast i32* %arg to i8*
   1136   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
   1137   %tmp2 = load i8, i8* %tmp1, align 1
   1138   %tmp3 = zext i8 %tmp2 to i32
   1139   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
   1140   %tmp5 = load i8, i8* %tmp4, align 1
   1141   %tmp6 = zext i8 %tmp5 to i32
   1142   %tmp7 = shl nuw nsw i32 %tmp6, 8
   1143   %tmp8 = or i32 %tmp7, %tmp3
   1144   ret i32 %tmp8
   1145 }
   1146 
   1147 ; i8* p;
   1148 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
   1149 define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
   1150 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
   1151 ; CHECK:       # %bb.0:
   1152 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1153 ; CHECK-NEXT:    movzbl (%eax), %ecx
   1154 ; CHECK-NEXT:    shll $8, %ecx
   1155 ; CHECK-NEXT:    movzbl 1(%eax), %eax
   1156 ; CHECK-NEXT:    shll $16, %eax
   1157 ; CHECK-NEXT:    orl %ecx, %eax
   1158 ; CHECK-NEXT:    retl
   1159 ;
   1160 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
   1161 ; CHECK64:       # %bb.0:
   1162 ; CHECK64-NEXT:    movzbl (%rdi), %ecx
   1163 ; CHECK64-NEXT:    shll $8, %ecx
   1164 ; CHECK64-NEXT:    movzbl 1(%rdi), %eax
   1165 ; CHECK64-NEXT:    shll $16, %eax
   1166 ; CHECK64-NEXT:    orl %ecx, %eax
   1167 ; CHECK64-NEXT:    retq
   1168   %tmp = bitcast i32* %arg to i8*
   1169   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
   1170   %tmp2 = load i8, i8* %tmp1, align 1
   1171   %tmp3 = zext i8 %tmp2 to i32
   1172   %tmp30 = shl nuw nsw i32 %tmp3, 8
   1173   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
   1174   %tmp5 = load i8, i8* %tmp4, align 1
   1175   %tmp6 = zext i8 %tmp5 to i32
   1176   %tmp7 = shl nuw nsw i32 %tmp6, 16
   1177   %tmp8 = or i32 %tmp7, %tmp30
   1178   ret i32 %tmp8
   1179 }
   1180 
   1181 ; i8* p;
   1182 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
   1183 define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
   1184 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
   1185 ; CHECK:       # %bb.0:
   1186 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1187 ; CHECK-NEXT:    movzbl (%eax), %ecx
   1188 ; CHECK-NEXT:    shll $16, %ecx
   1189 ; CHECK-NEXT:    movzbl 1(%eax), %eax
   1190 ; CHECK-NEXT:    shll $24, %eax
   1191 ; CHECK-NEXT:    orl %ecx, %eax
   1192 ; CHECK-NEXT:    retl
   1193 ;
   1194 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
   1195 ; CHECK64:       # %bb.0:
   1196 ; CHECK64-NEXT:    movzbl (%rdi), %ecx
   1197 ; CHECK64-NEXT:    shll $16, %ecx
   1198 ; CHECK64-NEXT:    movzbl 1(%rdi), %eax
   1199 ; CHECK64-NEXT:    shll $24, %eax
   1200 ; CHECK64-NEXT:    orl %ecx, %eax
   1201 ; CHECK64-NEXT:    retq
   1202   %tmp = bitcast i32* %arg to i8*
   1203   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
   1204   %tmp2 = load i8, i8* %tmp1, align 1
   1205   %tmp3 = zext i8 %tmp2 to i32
   1206   %tmp30 = shl nuw nsw i32 %tmp3, 16
   1207   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
   1208   %tmp5 = load i8, i8* %tmp4, align 1
   1209   %tmp6 = zext i8 %tmp5 to i32
   1210   %tmp7 = shl nuw nsw i32 %tmp6, 24
   1211   %tmp8 = or i32 %tmp7, %tmp30
   1212   ret i32 %tmp8
   1213 }
   1214 
   1215 ; i8* p;
   1216 ; (i32) p[1] | ((i32) p[0] << 8)
   1217 define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
   1218 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
   1219 ; CHECK:       # %bb.0:
   1220 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1221 ; CHECK-NEXT:    movzbl 1(%eax), %ecx
   1222 ; CHECK-NEXT:    movzbl (%eax), %eax
   1223 ; CHECK-NEXT:    shll $8, %eax
   1224 ; CHECK-NEXT:    orl %ecx, %eax
   1225 ; CHECK-NEXT:    retl
   1226 ;
   1227 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
   1228 ; CHECK64:       # %bb.0:
   1229 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
   1230 ; CHECK64-NEXT:    movzbl (%rdi), %eax
   1231 ; CHECK64-NEXT:    shll $8, %eax
   1232 ; CHECK64-NEXT:    orl %ecx, %eax
   1233 ; CHECK64-NEXT:    retq
   1234   %tmp = bitcast i32* %arg to i8*
   1235   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
   1236   %tmp2 = load i8, i8* %tmp1, align 1
   1237   %tmp3 = zext i8 %tmp2 to i32
   1238   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
   1239   %tmp5 = load i8, i8* %tmp4, align 1
   1240   %tmp6 = zext i8 %tmp5 to i32
   1241   %tmp7 = shl nuw nsw i32 %tmp6, 8
   1242   %tmp8 = or i32 %tmp7, %tmp3
   1243   ret i32 %tmp8
   1244 }
   1245 
   1246 ; i8* p;
   1247 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
   1248 define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
   1249 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
   1250 ; CHECK:       # %bb.0:
   1251 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1252 ; CHECK-NEXT:    movzbl 1(%eax), %ecx
   1253 ; CHECK-NEXT:    shll $8, %ecx
   1254 ; CHECK-NEXT:    movzbl (%eax), %eax
   1255 ; CHECK-NEXT:    shll $16, %eax
   1256 ; CHECK-NEXT:    orl %ecx, %eax
   1257 ; CHECK-NEXT:    retl
   1258 ;
   1259 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
   1260 ; CHECK64:       # %bb.0:
   1261 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
   1262 ; CHECK64-NEXT:    shll $8, %ecx
   1263 ; CHECK64-NEXT:    movzbl (%rdi), %eax
   1264 ; CHECK64-NEXT:    shll $16, %eax
   1265 ; CHECK64-NEXT:    orl %ecx, %eax
   1266 ; CHECK64-NEXT:    retq
   1267   %tmp = bitcast i32* %arg to i8*
   1268   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
   1269   %tmp2 = load i8, i8* %tmp1, align 1
   1270   %tmp3 = zext i8 %tmp2 to i32
   1271   %tmp30 = shl nuw nsw i32 %tmp3, 8
   1272   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
   1273   %tmp5 = load i8, i8* %tmp4, align 1
   1274   %tmp6 = zext i8 %tmp5 to i32
   1275   %tmp7 = shl nuw nsw i32 %tmp6, 16
   1276   %tmp8 = or i32 %tmp7, %tmp30
   1277   ret i32 %tmp8
   1278 }
   1279 
   1280 ; i8* p;
   1281 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
   1282 define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
   1283 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
   1284 ; CHECK:       # %bb.0:
   1285 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1286 ; CHECK-NEXT:    movzbl 1(%eax), %ecx
   1287 ; CHECK-NEXT:    shll $16, %ecx
   1288 ; CHECK-NEXT:    movzbl (%eax), %eax
   1289 ; CHECK-NEXT:    shll $24, %eax
   1290 ; CHECK-NEXT:    orl %ecx, %eax
   1291 ; CHECK-NEXT:    retl
   1292 ;
   1293 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
   1294 ; CHECK64:       # %bb.0:
   1295 ; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
   1296 ; CHECK64-NEXT:    shll $16, %ecx
   1297 ; CHECK64-NEXT:    movzbl (%rdi), %eax
   1298 ; CHECK64-NEXT:    shll $24, %eax
   1299 ; CHECK64-NEXT:    orl %ecx, %eax
   1300 ; CHECK64-NEXT:    retq
   1301   %tmp = bitcast i32* %arg to i8*
   1302   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
   1303   %tmp2 = load i8, i8* %tmp1, align 1
   1304   %tmp3 = zext i8 %tmp2 to i32
   1305   %tmp30 = shl nuw nsw i32 %tmp3, 16
   1306   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
   1307   %tmp5 = load i8, i8* %tmp4, align 1
   1308   %tmp6 = zext i8 %tmp5 to i32
   1309   %tmp7 = shl nuw nsw i32 %tmp6, 24
   1310   %tmp8 = or i32 %tmp7, %tmp30
   1311   ret i32 %tmp8
   1312 }
   1313