Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple=arm64-unknown | FileCheck %s
      2 
      3 ; i8* p; // p is 1 byte aligned
      4 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
      5 define i32 @load_i32_by_i8_unaligned(i32* %arg) {
      6 ; CHECK-LABEL: load_i32_by_i8_unaligned:
      7 ; CHECK: ldr		w0, [x0]
      8 ; CHECK-NEXT: ret
      9   %tmp = bitcast i32* %arg to i8*
     10   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
     11   %tmp2 = load i8, i8* %tmp1, align 1
     12   %tmp3 = zext i8 %tmp2 to i32
     13   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
     14   %tmp5 = load i8, i8* %tmp4, align 1
     15   %tmp6 = zext i8 %tmp5 to i32
     16   %tmp7 = shl nuw nsw i32 %tmp6, 8
     17   %tmp8 = or i32 %tmp7, %tmp3
     18   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
     19   %tmp10 = load i8, i8* %tmp9, align 1
     20   %tmp11 = zext i8 %tmp10 to i32
     21   %tmp12 = shl nuw nsw i32 %tmp11, 16
     22   %tmp13 = or i32 %tmp8, %tmp12
     23   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
     24   %tmp15 = load i8, i8* %tmp14, align 1
     25   %tmp16 = zext i8 %tmp15 to i32
     26   %tmp17 = shl nuw nsw i32 %tmp16, 24
     27   %tmp18 = or i32 %tmp13, %tmp17
     28   ret i32 %tmp18
     29 }
     30 
     31 ; i8* p; // p is 4 byte aligned
     32 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
     33 define i32 @load_i32_by_i8_aligned(i32* %arg) {
     34 ; CHECK-LABEL: load_i32_by_i8_aligned:
     35 ; CHECK: ldr    w0, [x0]
     36 ; CHECK-NEXT: ret
     37   %tmp = bitcast i32* %arg to i8*
     38   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
     39   %tmp2 = load i8, i8* %tmp1, align 4
     40   %tmp3 = zext i8 %tmp2 to i32
     41   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
     42   %tmp5 = load i8, i8* %tmp4, align 1
     43   %tmp6 = zext i8 %tmp5 to i32
     44   %tmp7 = shl nuw nsw i32 %tmp6, 8
     45   %tmp8 = or i32 %tmp7, %tmp3
     46   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
     47   %tmp10 = load i8, i8* %tmp9, align 1
     48   %tmp11 = zext i8 %tmp10 to i32
     49   %tmp12 = shl nuw nsw i32 %tmp11, 16
     50   %tmp13 = or i32 %tmp8, %tmp12
     51   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
     52   %tmp15 = load i8, i8* %tmp14, align 1
     53   %tmp16 = zext i8 %tmp15 to i32
     54   %tmp17 = shl nuw nsw i32 %tmp16, 24
     55   %tmp18 = or i32 %tmp13, %tmp17
     56   ret i32 %tmp18
     57 }
     58 
     59 ; i8* p; // p is 4 byte aligned
     60 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
     61 define i32 @load_i32_by_i8_bswap(i32* %arg) {
     62 ; CHECK-LABEL: load_i32_by_i8_bswap:
     63 ; CHECK: ldr		w8, [x0]
     64 ; CHECK-NEXT: rev	w0, w8
     65 ; CHECK-NEXT: ret
     66   %tmp = bitcast i32* %arg to i8*
     67   %tmp1 = load i8, i8* %tmp, align 4
     68   %tmp2 = zext i8 %tmp1 to i32
     69   %tmp3 = shl nuw nsw i32 %tmp2, 24
     70   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
     71   %tmp5 = load i8, i8* %tmp4, align 1
     72   %tmp6 = zext i8 %tmp5 to i32
     73   %tmp7 = shl nuw nsw i32 %tmp6, 16
     74   %tmp8 = or i32 %tmp7, %tmp3
     75   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
     76   %tmp10 = load i8, i8* %tmp9, align 1
     77   %tmp11 = zext i8 %tmp10 to i32
     78   %tmp12 = shl nuw nsw i32 %tmp11, 8
     79   %tmp13 = or i32 %tmp8, %tmp12
     80   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
     81   %tmp15 = load i8, i8* %tmp14, align 1
     82   %tmp16 = zext i8 %tmp15 to i32
     83   %tmp17 = or i32 %tmp13, %tmp16
     84   ret i32 %tmp17
     85 }
     86 
     87 ; i8* p; // p is 8 byte aligned
     88 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
     89 define i64 @load_i64_by_i8(i64* %arg) {
     90 ; CHECK-LABEL: load_i64_by_i8:
     91 ; CHECK: ldr		x0, [x0]
     92 ; CHECK-NEXT: ret
     93   %tmp = bitcast i64* %arg to i8*
     94   %tmp1 = load i8, i8* %tmp, align 8
     95   %tmp2 = zext i8 %tmp1 to i64
     96   %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
     97   %tmp4 = load i8, i8* %tmp3, align 1
     98   %tmp5 = zext i8 %tmp4 to i64
     99   %tmp6 = shl nuw nsw i64 %tmp5, 8
    100   %tmp7 = or i64 %tmp6, %tmp2
    101   %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
    102   %tmp9 = load i8, i8* %tmp8, align 1
    103   %tmp10 = zext i8 %tmp9 to i64
    104   %tmp11 = shl nuw nsw i64 %tmp10, 16
    105   %tmp12 = or i64 %tmp7, %tmp11
    106   %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
    107   %tmp14 = load i8, i8* %tmp13, align 1
    108   %tmp15 = zext i8 %tmp14 to i64
    109   %tmp16 = shl nuw nsw i64 %tmp15, 24
    110   %tmp17 = or i64 %tmp12, %tmp16
    111   %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
    112   %tmp19 = load i8, i8* %tmp18, align 1
    113   %tmp20 = zext i8 %tmp19 to i64
    114   %tmp21 = shl nuw nsw i64 %tmp20, 32
    115   %tmp22 = or i64 %tmp17, %tmp21
    116   %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
    117   %tmp24 = load i8, i8* %tmp23, align 1
    118   %tmp25 = zext i8 %tmp24 to i64
    119   %tmp26 = shl nuw nsw i64 %tmp25, 40
    120   %tmp27 = or i64 %tmp22, %tmp26
    121   %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
    122   %tmp29 = load i8, i8* %tmp28, align 1
    123   %tmp30 = zext i8 %tmp29 to i64
    124   %tmp31 = shl nuw nsw i64 %tmp30, 48
    125   %tmp32 = or i64 %tmp27, %tmp31
    126   %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
    127   %tmp34 = load i8, i8* %tmp33, align 1
    128   %tmp35 = zext i8 %tmp34 to i64
    129   %tmp36 = shl nuw i64 %tmp35, 56
    130   %tmp37 = or i64 %tmp32, %tmp36
    131   ret i64 %tmp37
    132 }
    133 
    134 ; i8* p; // p is 8 byte aligned
    135 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
    136 define i64 @load_i64_by_i8_bswap(i64* %arg) {
    137 ; CHECK-LABEL: load_i64_by_i8_bswap:
    138 ; CHECK: ldr		x8, [x0]
    139 ; CHECK-NEXT: rev	x0, x8
    140 ; CHECK-NEXT: ret
    141   %tmp = bitcast i64* %arg to i8*
    142   %tmp1 = load i8, i8* %tmp, align 8
    143   %tmp2 = zext i8 %tmp1 to i64
    144   %tmp3 = shl nuw i64 %tmp2, 56
    145   %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
    146   %tmp5 = load i8, i8* %tmp4, align 1
    147   %tmp6 = zext i8 %tmp5 to i64
    148   %tmp7 = shl nuw nsw i64 %tmp6, 48
    149   %tmp8 = or i64 %tmp7, %tmp3
    150   %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
    151   %tmp10 = load i8, i8* %tmp9, align 1
    152   %tmp11 = zext i8 %tmp10 to i64
    153   %tmp12 = shl nuw nsw i64 %tmp11, 40
    154   %tmp13 = or i64 %tmp8, %tmp12
    155   %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
    156   %tmp15 = load i8, i8* %tmp14, align 1
    157   %tmp16 = zext i8 %tmp15 to i64
    158   %tmp17 = shl nuw nsw i64 %tmp16, 32
    159   %tmp18 = or i64 %tmp13, %tmp17
    160   %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
    161   %tmp20 = load i8, i8* %tmp19, align 1
    162   %tmp21 = zext i8 %tmp20 to i64
    163   %tmp22 = shl nuw nsw i64 %tmp21, 24
    164   %tmp23 = or i64 %tmp18, %tmp22
    165   %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
    166   %tmp25 = load i8, i8* %tmp24, align 1
    167   %tmp26 = zext i8 %tmp25 to i64
    168   %tmp27 = shl nuw nsw i64 %tmp26, 16
    169   %tmp28 = or i64 %tmp23, %tmp27
    170   %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
    171   %tmp30 = load i8, i8* %tmp29, align 1
    172   %tmp31 = zext i8 %tmp30 to i64
    173   %tmp32 = shl nuw nsw i64 %tmp31, 8
    174   %tmp33 = or i64 %tmp28, %tmp32
    175   %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
    176   %tmp35 = load i8, i8* %tmp34, align 1
    177   %tmp36 = zext i8 %tmp35 to i64
    178   %tmp37 = or i64 %tmp33, %tmp36
    179   ret i64 %tmp37
    180 }
    181 
    182 ; i8* p; // p[1] is 4 byte aligned
    183 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
    184 define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
    185 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
    186 ; CHECK: ldur w0, [x0, #1]
    187 ; CHECK-NEXT: ret
    188 
    189   %tmp = bitcast i32* %arg to i8*
    190   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    191   %tmp2 = load i8, i8* %tmp1, align 4
    192   %tmp3 = zext i8 %tmp2 to i32
    193   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
    194   %tmp5 = load i8, i8* %tmp4, align 1
    195   %tmp6 = zext i8 %tmp5 to i32
    196   %tmp7 = shl nuw nsw i32 %tmp6, 8
    197   %tmp8 = or i32 %tmp7, %tmp3
    198   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
    199   %tmp10 = load i8, i8* %tmp9, align 1
    200   %tmp11 = zext i8 %tmp10 to i32
    201   %tmp12 = shl nuw nsw i32 %tmp11, 16
    202   %tmp13 = or i32 %tmp8, %tmp12
    203   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
    204   %tmp15 = load i8, i8* %tmp14, align 1
    205   %tmp16 = zext i8 %tmp15 to i32
    206   %tmp17 = shl nuw nsw i32 %tmp16, 24
    207   %tmp18 = or i32 %tmp13, %tmp17
    208   ret i32 %tmp18
    209 }
    210 
    211 ; i8* p; // p[-4] is 4 byte aligned
    212 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
    213 define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
    214 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
    215 ; CHECK: ldur w0, [x0, #-4]
    216 ; CHECK-NEXT: ret
    217 
    218   %tmp = bitcast i32* %arg to i8*
    219   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
    220   %tmp2 = load i8, i8* %tmp1, align 4
    221   %tmp3 = zext i8 %tmp2 to i32
    222   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
    223   %tmp5 = load i8, i8* %tmp4, align 1
    224   %tmp6 = zext i8 %tmp5 to i32
    225   %tmp7 = shl nuw nsw i32 %tmp6, 8
    226   %tmp8 = or i32 %tmp7, %tmp3
    227   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
    228   %tmp10 = load i8, i8* %tmp9, align 1
    229   %tmp11 = zext i8 %tmp10 to i32
    230   %tmp12 = shl nuw nsw i32 %tmp11, 16
    231   %tmp13 = or i32 %tmp8, %tmp12
    232   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
    233   %tmp15 = load i8, i8* %tmp14, align 1
    234   %tmp16 = zext i8 %tmp15 to i32
    235   %tmp17 = shl nuw nsw i32 %tmp16, 24
    236   %tmp18 = or i32 %tmp13, %tmp17
    237   ret i32 %tmp18
    238 }
    239 
    240 ; i8* p; // p[1] is 4 byte aligned
    241 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
    242 define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
    243 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
    244 ; CHECK: ldur  w8, [x0, #1]
    245 ; CHECK-NEXT: rev w0, w8
    246 ; CHECK-NEXT: ret
    247 
    248   %tmp = bitcast i32* %arg to i8*
    249   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
    250   %tmp2 = load i8, i8* %tmp1, align 1
    251   %tmp3 = zext i8 %tmp2 to i32
    252   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
    253   %tmp5 = load i8, i8* %tmp4, align 1
    254   %tmp6 = zext i8 %tmp5 to i32
    255   %tmp7 = shl nuw nsw i32 %tmp6, 8
    256   %tmp8 = or i32 %tmp7, %tmp3
    257   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
    258   %tmp10 = load i8, i8* %tmp9, align 1
    259   %tmp11 = zext i8 %tmp10 to i32
    260   %tmp12 = shl nuw nsw i32 %tmp11, 16
    261   %tmp13 = or i32 %tmp8, %tmp12
    262   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
    263   %tmp15 = load i8, i8* %tmp14, align 4
    264   %tmp16 = zext i8 %tmp15 to i32
    265   %tmp17 = shl nuw nsw i32 %tmp16, 24
    266   %tmp18 = or i32 %tmp13, %tmp17
    267   ret i32 %tmp18
    268 }
    269 
    270 ; i8* p; // p[-4] is 4 byte aligned
    271 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
    272 define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
    273 ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
    274 ; CHECK: ldur  w8, [x0, #-4]
    275 ; CHECK-NEXT: rev w0, w8
    276 ; CHECK-NEXT: ret
    277 
    278   %tmp = bitcast i32* %arg to i8*
    279   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
    280   %tmp2 = load i8, i8* %tmp1, align 1
    281   %tmp3 = zext i8 %tmp2 to i32
    282   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
    283   %tmp5 = load i8, i8* %tmp4, align 1
    284   %tmp6 = zext i8 %tmp5 to i32
    285   %tmp7 = shl nuw nsw i32 %tmp6, 8
    286   %tmp8 = or i32 %tmp7, %tmp3
    287   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
    288   %tmp10 = load i8, i8* %tmp9, align 1
    289   %tmp11 = zext i8 %tmp10 to i32
    290   %tmp12 = shl nuw nsw i32 %tmp11, 16
    291   %tmp13 = or i32 %tmp8, %tmp12
    292   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
    293   %tmp15 = load i8, i8* %tmp14, align 4
    294   %tmp16 = zext i8 %tmp15 to i32
    295   %tmp17 = shl nuw nsw i32 %tmp16, 24
    296   %tmp18 = or i32 %tmp13, %tmp17
    297   ret i32 %tmp18
    298 }
    299 
    300 declare i16 @llvm.bswap.i16(i16)
    301 
    302 ; i16* p; // p is 4 byte aligned
    303 ; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
    304 define i32 @load_i32_by_bswap_i16(i32* %arg) {
    305 ; CHECK-LABEL: load_i32_by_bswap_i16:
    306 ; CHECK: ldr    w8, [x0]
    307 ; CHECK-NEXT: rev w0, w8
    308 ; CHECK-NEXT: ret
    309 
    310   %tmp = bitcast i32* %arg to i16*
    311   %tmp1 = load i16, i16* %tmp, align 4
    312   %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
    313   %tmp2 = zext i16 %tmp11 to i32
    314   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
    315   %tmp4 = load i16, i16* %tmp3, align 1
    316   %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
    317   %tmp5 = zext i16 %tmp41 to i32
    318   %tmp6 = shl nuw nsw i32 %tmp2, 16
    319   %tmp7 = or i32 %tmp6, %tmp5
    320   ret i32 %tmp7
    321 }
    322 
    323 ; i16* p; // p is 4 byte aligned
    324 ; (i32) p[0] | (sext(p[1] << 16) to i32)
    325 define i32 @load_i32_by_sext_i16(i32* %arg) {
    326 ; CHECK-LABEL: load_i32_by_sext_i16:
    327 ; CHECK: ldr   w0, [x0]
    328 ; CHECK-NEXT: ret
    329   %tmp = bitcast i32* %arg to i16*
    330   %tmp1 = load i16, i16* %tmp, align 4
    331   %tmp2 = zext i16 %tmp1 to i32
    332   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
    333   %tmp4 = load i16, i16* %tmp3, align 1
    334   %tmp5 = sext i16 %tmp4 to i32
    335   %tmp6 = shl nuw nsw i32 %tmp5, 16
    336   %tmp7 = or i32 %tmp6, %tmp2
    337   ret i32 %tmp7
    338 }
    339 
    340 ; i8* arg; i32 i;
    341 ; p = arg + 12;
    342 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
    343 define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
    344 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
    345 ; CHECK: add x8, x0, w1, uxtw
    346 ; CHECK-NEXT: ldr w0, [x8, #12]
    347 ; CHECK-NEXT: ret
    348   %tmp = add nuw nsw i32 %i, 3
    349   %tmp2 = add nuw nsw i32 %i, 2
    350   %tmp3 = add nuw nsw i32 %i, 1
    351   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
    352   %tmp5 = zext i32 %i to i64
    353   %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
    354   %tmp7 = load i8, i8* %tmp6, align 4
    355   %tmp8 = zext i8 %tmp7 to i32
    356   %tmp9 = zext i32 %tmp3 to i64
    357   %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
    358   %tmp11 = load i8, i8* %tmp10, align 1
    359   %tmp12 = zext i8 %tmp11 to i32
    360   %tmp13 = shl nuw nsw i32 %tmp12, 8
    361   %tmp14 = or i32 %tmp13, %tmp8
    362   %tmp15 = zext i32 %tmp2 to i64
    363   %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
    364   %tmp17 = load i8, i8* %tmp16, align 1
    365   %tmp18 = zext i8 %tmp17 to i32
    366   %tmp19 = shl nuw nsw i32 %tmp18, 16
    367   %tmp20 = or i32 %tmp14, %tmp19
    368   %tmp21 = zext i32 %tmp to i64
    369   %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
    370   %tmp23 = load i8, i8* %tmp22, align 1
    371   %tmp24 = zext i8 %tmp23 to i32
    372   %tmp25 = shl nuw i32 %tmp24, 24
    373   %tmp26 = or i32 %tmp20, %tmp25
    374   ret i32 %tmp26
    375 }
    376 
    377 ; i8* arg; i32 i;
    378 ; p = arg + 12;
    379 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
    380 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
    381 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
    382 ; CHECK: add x8, x0, w1, uxtw
    383 ; CHECK-NEXT: ldur  w0, [x8, #13]
    384 ; CHECK-NEXT: ret
    385   %tmp = add nuw nsw i32 %i, 4
    386   %tmp2 = add nuw nsw i32 %i, 3
    387   %tmp3 = add nuw nsw i32 %i, 2
    388   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
    389   %tmp5 = add nuw nsw i32 %i, 1
    390   %tmp27 = zext i32 %tmp5 to i64
    391   %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
    392   %tmp29 = load i8, i8* %tmp28, align 4
    393   %tmp30 = zext i8 %tmp29 to i32
    394   %tmp31 = zext i32 %tmp3 to i64
    395   %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
    396   %tmp33 = load i8, i8* %tmp32, align 1
    397   %tmp34 = zext i8 %tmp33 to i32
    398   %tmp35 = shl nuw nsw i32 %tmp34, 8
    399   %tmp36 = or i32 %tmp35, %tmp30
    400   %tmp37 = zext i32 %tmp2 to i64
    401   %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
    402   %tmp39 = load i8, i8* %tmp38, align 1
    403   %tmp40 = zext i8 %tmp39 to i32
    404   %tmp41 = shl nuw nsw i32 %tmp40, 16
    405   %tmp42 = or i32 %tmp36, %tmp41
    406   %tmp43 = zext i32 %tmp to i64
    407   %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
    408   %tmp45 = load i8, i8* %tmp44, align 1
    409   %tmp46 = zext i8 %tmp45 to i32
    410   %tmp47 = shl nuw i32 %tmp46, 24
    411   %tmp48 = or i32 %tmp42, %tmp47
    412   ret i32 %tmp48
    413 }
    414 
    415 ; i8* p; // p is 2 byte aligned
    416 ; (i32) p[0] | ((i32) p[1] << 8)
    417 define i32 @zext_load_i32_by_i8(i32* %arg) {
    418 ; CHECK-LABEL: zext_load_i32_by_i8:
    419 ; CHECK: ldrb  w8, [x0]
    420 ; CHECK-NEXT: ldrb  w9, [x0, #1]
    421 ; CHECK-NEXT: bfi w8, w9, #8, #8
    422 ; CHECK-NEXT: mov  w0, w8
    423 ; CHECK-NEXT: ret
    424 
    425   %tmp = bitcast i32* %arg to i8*
    426   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
    427   %tmp2 = load i8, i8* %tmp1, align 2
    428   %tmp3 = zext i8 %tmp2 to i32
    429   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
    430   %tmp5 = load i8, i8* %tmp4, align 1
    431   %tmp6 = zext i8 %tmp5 to i32
    432   %tmp7 = shl nuw nsw i32 %tmp6, 8
    433   %tmp8 = or i32 %tmp7, %tmp3
    434   ret i32 %tmp8
    435 }
    436 
    437 ; i8* p; // p is 2 byte aligned
    438 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
    439 define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
    440 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
    441 ; CHECK: ldrb  w8, [x0]
    442 ; CHECK-NEXT: ldrb  w9, [x0, #1]
    443 ; CHECK-NEXT: lsl w0, w8, #8
    444 ; CHECK-NEXT: bfi w0, w9, #16, #8
    445 ; CHECK-NEXT: ret
    446 
    447   %tmp = bitcast i32* %arg to i8*
    448   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
    449   %tmp2 = load i8, i8* %tmp1, align 2
    450   %tmp3 = zext i8 %tmp2 to i32
    451   %tmp30 = shl nuw nsw i32 %tmp3, 8
    452   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
    453   %tmp5 = load i8, i8* %tmp4, align 1
    454   %tmp6 = zext i8 %tmp5 to i32
    455   %tmp7 = shl nuw nsw i32 %tmp6, 16
    456   %tmp8 = or i32 %tmp7, %tmp30
    457   ret i32 %tmp8
    458 }
    459 
    460 ; i8* p; // p is 2 byte aligned
    461 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
    462 define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
    463 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
    464 ; CHECK: ldrb  w8, [x0]
    465 ; CHECK-NEXT: ldrb  w9, [x0, #1]
    466 ; CHECK-NEXT: lsl w0, w8, #16
    467 ; CHECK-NEXT: bfi w0, w9, #24, #8
    468 ; CHECK-NEXT: ret
    469 
    470   %tmp = bitcast i32* %arg to i8*
    471   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
    472   %tmp2 = load i8, i8* %tmp1, align 2
    473   %tmp3 = zext i8 %tmp2 to i32
    474   %tmp30 = shl nuw nsw i32 %tmp3, 16
    475   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
    476   %tmp5 = load i8, i8* %tmp4, align 1
    477   %tmp6 = zext i8 %tmp5 to i32
    478   %tmp7 = shl nuw nsw i32 %tmp6, 24
    479   %tmp8 = or i32 %tmp7, %tmp30
    480   ret i32 %tmp8
    481 }
    482 ; i8* p; // p is 2 byte aligned
    483 ; (i32) p[1] | ((i32) p[0] << 8)
    484 define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
    485 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
    486 ; CHECK: ldrb  w8, [x0, #1]
    487 ; CHECK-NEXT: ldrb    w9, [x0]
    488 ; CHECK-NEXT: bfi w8, w9, #8, #8
    489 ; CHECK-NEXT: mov  w0, w8
    490 ; CHECK-NEXT: ret
    491 
    492   %tmp = bitcast i32* %arg to i8*
    493   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    494   %tmp2 = load i8, i8* %tmp1, align 1
    495   %tmp3 = zext i8 %tmp2 to i32
    496   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
    497   %tmp5 = load i8, i8* %tmp4, align 2
    498   %tmp6 = zext i8 %tmp5 to i32
    499   %tmp7 = shl nuw nsw i32 %tmp6, 8
    500   %tmp8 = or i32 %tmp7, %tmp3
    501   ret i32 %tmp8
    502 }
    503 
    504 ; i8* p; // p is 2 byte aligned
    505 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
    506 define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
    507 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
    508 ; CHECK: ldrb  w8, [x0, #1]
    509 ; CHECK-NEXT: ldrb    w9, [x0]
    510 ; CHECK-NEXT: lsl w0, w8, #8
    511 ; CHECK-NEXT: bfi w0, w9, #16, #8
    512 ; CHECK-NEXT: ret
    513 
    514   %tmp = bitcast i32* %arg to i8*
    515   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    516   %tmp2 = load i8, i8* %tmp1, align 1
    517   %tmp3 = zext i8 %tmp2 to i32
    518   %tmp30 = shl nuw nsw i32 %tmp3, 8
    519   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
    520   %tmp5 = load i8, i8* %tmp4, align 2
    521   %tmp6 = zext i8 %tmp5 to i32
    522   %tmp7 = shl nuw nsw i32 %tmp6, 16
    523   %tmp8 = or i32 %tmp7, %tmp30
    524   ret i32 %tmp8
    525 }
    526 
    527 ; i8* p; // p is 2 byte aligned
    528 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
    529 define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
    530 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
    531 ; CHECK: ldrb  w8, [x0, #1]
    532 ; CHECK-NEXT: ldrb    w9, [x0]
    533 ; CHECK-NEXT: lsl w0, w8, #16
    534 ; CHECK-NEXT: bfi w0, w9, #24, #8
    535 ; CHECK-NEXT: ret
    536 
    537   %tmp = bitcast i32* %arg to i8*
    538   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    539   %tmp2 = load i8, i8* %tmp1, align 1
    540   %tmp3 = zext i8 %tmp2 to i32
    541   %tmp30 = shl nuw nsw i32 %tmp3, 16
    542   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
    543   %tmp5 = load i8, i8* %tmp4, align 2
    544   %tmp6 = zext i8 %tmp5 to i32
    545   %tmp7 = shl nuw nsw i32 %tmp6, 24
    546   %tmp8 = or i32 %tmp7, %tmp30
    547   ret i32 %tmp8
    548 }
    549