Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -mtriple=armeb-unknown | FileCheck %s
      2 ; RUN: llc < %s -mtriple=armv6eb-unknown | FileCheck %s --check-prefix=CHECK-ARMv6
      3 
      4 ; i8* p; // p is 4 byte aligned
      5 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
      6 define i32 @load_i32_by_i8_big_endian(i32* %arg) {
      7 ; CHECK-LABEL: load_i32_by_i8_big_endian:
      8 ; CHECK: ldr r0, [r0]
      9 ; CHECK-NEXT: mov pc, lr
     10 
     11 ; CHECK-ARMv6-LABEL: load_i32_by_i8_big_endian:
     12 ; CHECK-ARMv6: ldr r0, [r0]
     13 ; CHECK-ARMv6-NEXT: bx  lr
     14   %tmp = bitcast i32* %arg to i8*
     15   %tmp1 = load i8, i8* %tmp, align 4
     16   %tmp2 = zext i8 %tmp1 to i32
     17   %tmp3 = shl nuw nsw i32 %tmp2, 24
     18   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
     19   %tmp5 = load i8, i8* %tmp4, align 1
     20   %tmp6 = zext i8 %tmp5 to i32
     21   %tmp7 = shl nuw nsw i32 %tmp6, 16
     22   %tmp8 = or i32 %tmp7, %tmp3
     23   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
     24   %tmp10 = load i8, i8* %tmp9, align 1
     25   %tmp11 = zext i8 %tmp10 to i32
     26   %tmp12 = shl nuw nsw i32 %tmp11, 8
     27   %tmp13 = or i32 %tmp8, %tmp12
     28   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
     29   %tmp15 = load i8, i8* %tmp14, align 1
     30   %tmp16 = zext i8 %tmp15 to i32
     31   %tmp17 = or i32 %tmp13, %tmp16
     32   ret i32 %tmp17
     33 }
     34 
     35 ; i8* p; // p is 4 byte aligned
     36 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
     37 define i32 @load_i32_by_i8_bswap(i32* %arg) {
     38 ; BSWAP is not supported by 32 bit target
     39 ; CHECK-LABEL: load_i32_by_i8_bswap:
     40 ; CHECK: ldr  r0, [r0]
     41 ; CHECK: and
     42 ; CHECK-NEXT: and
     43 ; CHECK-NEXT: orr
     44 ; CHECK-NEXT: orr
     45 ; CHECK-NEXT: orr
     46 ; CHECK-NEXT: mov pc, lr
     47 
     48 ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
     49 ; CHECK-ARMv6: ldr  r0, [r0]
     50 ; CHECK-ARMv6-NEXT: rev  r0, r0
     51 ; CHECK-ARMv6-NEXT: bx lr
     52   %tmp = bitcast i32* %arg to i8*
     53   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
     54   %tmp2 = load i8, i8* %tmp1, align 4
     55   %tmp3 = zext i8 %tmp2 to i32
     56   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
     57   %tmp5 = load i8, i8* %tmp4, align 1
     58   %tmp6 = zext i8 %tmp5 to i32
     59   %tmp7 = shl nuw nsw i32 %tmp6, 8
     60   %tmp8 = or i32 %tmp7, %tmp3
     61   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
     62   %tmp10 = load i8, i8* %tmp9, align 1
     63   %tmp11 = zext i8 %tmp10 to i32
     64   %tmp12 = shl nuw nsw i32 %tmp11, 16
     65   %tmp13 = or i32 %tmp8, %tmp12
     66   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
     67   %tmp15 = load i8, i8* %tmp14, align 1
     68   %tmp16 = zext i8 %tmp15 to i32
     69   %tmp17 = shl nuw nsw i32 %tmp16, 24
     70   %tmp18 = or i32 %tmp13, %tmp17
     71   ret i32 %tmp18
     72 }
     73 
     74 ; i8* p; // p is 4 byte aligned
     75 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
     76 define i32 @load_i32_by_i16_by_i8_big_endian(i32* %arg) {
     77 ; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian:
     78 ; CHECK: ldr r0, [r0]
     79 ; CHECK-NEXT: mov pc, lr
     80 
     81 ; CHECK-ARMv6-LABEL: load_i32_by_i16_by_i8_big_endian:
     82 ; CHECK-ARMv6: ldr r0, [r0]
     83 ; CHECK-ARMv6-NEXT: bx  lr
     84   %tmp = bitcast i32* %arg to i8*
     85   %tmp1 = load i8, i8* %tmp, align 4
     86   %tmp2 = zext i8 %tmp1 to i16
     87   %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
     88   %tmp4 = load i8, i8* %tmp3, align 1
     89   %tmp5 = zext i8 %tmp4 to i16
     90   %tmp6 = shl nuw nsw i16 %tmp2, 8
     91   %tmp7 = or i16 %tmp6, %tmp5
     92   %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
     93   %tmp9 = load i8, i8* %tmp8, align 1
     94   %tmp10 = zext i8 %tmp9 to i16
     95   %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
     96   %tmp12 = load i8, i8* %tmp11, align 1
     97   %tmp13 = zext i8 %tmp12 to i16
     98   %tmp14 = shl nuw nsw i16 %tmp10, 8
     99   %tmp15 = or i16 %tmp14, %tmp13
    100   %tmp16 = zext i16 %tmp7 to i32
    101   %tmp17 = zext i16 %tmp15 to i32
    102   %tmp18 = shl nuw nsw i32 %tmp16, 16
    103   %tmp19 = or i32 %tmp18, %tmp17
    104   ret i32 %tmp19
    105 }
    106 
    107 ; i16* p; // p is 4 byte aligned
    108 ; ((i32) p[0] << 16) | (i32) p[1]
    109 define i32 @load_i32_by_i16(i32* %arg) {
    110 ; CHECK-LABEL: load_i32_by_i16:
    111 ; CHECK: ldr r0, [r0]
    112 ; CHECK-NEXT: mov pc, lr
    113 
    114 ; CHECK-ARMv6-LABEL: load_i32_by_i16:
    115 ; CHECK-ARMv6: ldr r0, [r0]
    116 ; CHECK-ARMv6-NEXT: bx  lr
    117   %tmp = bitcast i32* %arg to i16*
    118   %tmp1 = load i16, i16* %tmp, align 4
    119   %tmp2 = zext i16 %tmp1 to i32
    120   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
    121   %tmp4 = load i16, i16* %tmp3, align 1
    122   %tmp5 = zext i16 %tmp4 to i32
    123   %tmp6 = shl nuw nsw i32 %tmp2, 16
    124   %tmp7 = or i32 %tmp6, %tmp5
    125   ret i32 %tmp7
    126 }
    127 
    128 ; i16* p_16; // p_16 is 4 byte aligned
    129 ; i8* p_8 = (i8*) p_16;
    130 ; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3]
    131 define i32 @load_i32_by_i16_i8(i32* %arg) {
    132 ; CHECK-LABEL: load_i32_by_i16_i8:
    133 ; CHECK: ldr r0, [r0]
    134 ; CHECK-NEXT: mov pc, lr
    135 
    136 ; CHECK-ARMv6-LABEL: load_i32_by_i16_i8:
    137 ; CHECK-ARMv6: ldr r0, [r0]
    138 ; CHECK-ARMv6-NEXT: bx  lr
    139   %tmp = bitcast i32* %arg to i16*
    140   %tmp1 = bitcast i32* %arg to i8*
    141   %tmp2 = load i16, i16* %tmp, align 4
    142   %tmp3 = zext i16 %tmp2 to i32
    143   %tmp4 = shl nuw nsw i32 %tmp3, 16
    144   %tmp5 = getelementptr inbounds i8, i8* %tmp1, i32 2
    145   %tmp6 = load i8, i8* %tmp5, align 1
    146   %tmp7 = zext i8 %tmp6 to i32
    147   %tmp8 = shl nuw nsw i32 %tmp7, 8
    148   %tmp9 = getelementptr inbounds i8, i8* %tmp1, i32 3
    149   %tmp10 = load i8, i8* %tmp9, align 1
    150   %tmp11 = zext i8 %tmp10 to i32
    151   %tmp12 = or i32 %tmp8, %tmp11
    152   %tmp13 = or i32 %tmp12, %tmp4
    153   ret i32 %tmp13
    154 }
    155 
    156 ; i8* p; // p is 8 byte aligned
    157 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
    158 define i64 @load_i64_by_i8_bswap(i64* %arg) {
    159 ; CHECK-LABEL: load_i64_by_i8_bswap:
    160 ; CHECK: ldr{{.*}}r0
    161 ; CHECK: ldr{{.*}}r0
    162 ; CHECK: and
    163 ; CHECK-NEXT: and
    164 ; CHECK-NEXT: orr
    165 ; CHECK-NEXT: orr
    166 ; CHECK-NEXT: and
    167 ; CHECK-NEXT: orr
    168 ; CHECK-NEXT: and
    169 ; CHECK-NEXT: orr
    170 ; CHECK-NEXT: orr
    171 ; CHECK-NEXT: orr
    172 ; CHECK: mov pc, lr
    173 
    174 ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
    175 ; CHECK-ARMv6: ldrd  r2, r3, [r0]
    176 ; CHECK-ARMv6: rev r0, r3
    177 ; CHECK-ARMv6: rev r1, r2
    178 ; CHECK-ARMv6: bx  lr
    179   %tmp = bitcast i64* %arg to i8*
    180   %tmp1 = load i8, i8* %tmp, align 8
    181   %tmp2 = zext i8 %tmp1 to i64
    182   %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
    183   %tmp4 = load i8, i8* %tmp3, align 1
    184   %tmp5 = zext i8 %tmp4 to i64
    185   %tmp6 = shl nuw nsw i64 %tmp5, 8
    186   %tmp7 = or i64 %tmp6, %tmp2
    187   %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
    188   %tmp9 = load i8, i8* %tmp8, align 1
    189   %tmp10 = zext i8 %tmp9 to i64
    190   %tmp11 = shl nuw nsw i64 %tmp10, 16
    191   %tmp12 = or i64 %tmp7, %tmp11
    192   %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
    193   %tmp14 = load i8, i8* %tmp13, align 1
    194   %tmp15 = zext i8 %tmp14 to i64
    195   %tmp16 = shl nuw nsw i64 %tmp15, 24
    196   %tmp17 = or i64 %tmp12, %tmp16
    197   %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
    198   %tmp19 = load i8, i8* %tmp18, align 1
    199   %tmp20 = zext i8 %tmp19 to i64
    200   %tmp21 = shl nuw nsw i64 %tmp20, 32
    201   %tmp22 = or i64 %tmp17, %tmp21
    202   %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
    203   %tmp24 = load i8, i8* %tmp23, align 1
    204   %tmp25 = zext i8 %tmp24 to i64
    205   %tmp26 = shl nuw nsw i64 %tmp25, 40
    206   %tmp27 = or i64 %tmp22, %tmp26
    207   %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
    208   %tmp29 = load i8, i8* %tmp28, align 1
    209   %tmp30 = zext i8 %tmp29 to i64
    210   %tmp31 = shl nuw nsw i64 %tmp30, 48
    211   %tmp32 = or i64 %tmp27, %tmp31
    212   %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
    213   %tmp34 = load i8, i8* %tmp33, align 1
    214   %tmp35 = zext i8 %tmp34 to i64
    215   %tmp36 = shl nuw i64 %tmp35, 56
    216   %tmp37 = or i64 %tmp32, %tmp36
    217   ret i64 %tmp37
    218 }
    219 
    220 ; i8* p; // p is 8 byte aligned
    221 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
    222 define i64 @load_i64_by_i8(i64* %arg) {
    223 ; CHECK-LABEL: load_i64_by_i8:
    224 ; CHECK: ldr r2, [r0]
    225 ; CHECK: ldr r1, [r0, #4]
    226 ; CHECK: mov r0, r2
    227 ; CHECK: mov pc, lr
    228 
    229 ; CHECK-ARMv6-LABEL: load_i64_by_i8:
    230 ; CHECK-ARMv6: ldrd  r0, r1, [r0]
    231 ; CHECK-ARMv6: bx  lr
    232   %tmp = bitcast i64* %arg to i8*
    233   %tmp1 = load i8, i8* %tmp, align 8
    234   %tmp2 = zext i8 %tmp1 to i64
    235   %tmp3 = shl nuw i64 %tmp2, 56
    236   %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
    237   %tmp5 = load i8, i8* %tmp4, align 1
    238   %tmp6 = zext i8 %tmp5 to i64
    239   %tmp7 = shl nuw nsw i64 %tmp6, 48
    240   %tmp8 = or i64 %tmp7, %tmp3
    241   %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
    242   %tmp10 = load i8, i8* %tmp9, align 1
    243   %tmp11 = zext i8 %tmp10 to i64
    244   %tmp12 = shl nuw nsw i64 %tmp11, 40
    245   %tmp13 = or i64 %tmp8, %tmp12
    246   %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
    247   %tmp15 = load i8, i8* %tmp14, align 1
    248   %tmp16 = zext i8 %tmp15 to i64
    249   %tmp17 = shl nuw nsw i64 %tmp16, 32
    250   %tmp18 = or i64 %tmp13, %tmp17
    251   %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
    252   %tmp20 = load i8, i8* %tmp19, align 1
    253   %tmp21 = zext i8 %tmp20 to i64
    254   %tmp22 = shl nuw nsw i64 %tmp21, 24
    255   %tmp23 = or i64 %tmp18, %tmp22
    256   %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
    257   %tmp25 = load i8, i8* %tmp24, align 1
    258   %tmp26 = zext i8 %tmp25 to i64
    259   %tmp27 = shl nuw nsw i64 %tmp26, 16
    260   %tmp28 = or i64 %tmp23, %tmp27
    261   %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
    262   %tmp30 = load i8, i8* %tmp29, align 1
    263   %tmp31 = zext i8 %tmp30 to i64
    264   %tmp32 = shl nuw nsw i64 %tmp31, 8
    265   %tmp33 = or i64 %tmp28, %tmp32
    266   %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
    267   %tmp35 = load i8, i8* %tmp34, align 1
    268   %tmp36 = zext i8 %tmp35 to i64
    269   %tmp37 = or i64 %tmp33, %tmp36
    270   ret i64 %tmp37
    271 }
    272 
    273 ; i8* p; // p[1] is 4 byte aligned
    274 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
    275 define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
    276 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
    277 ; CHECK: ldr r0, [r0, #1]
    278 ; CHECK-NEXT: mov r1, #65280
    279 ; CHECK-NEXT: mov r2, #16711680
    280 ; CHECK-NEXT: and r1, r1, r0, lsr #8
    281 ; CHECK-NEXT: and r2, r2, r0, lsl #8
    282 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
    283 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
    284 ; CHECK-NEXT: orr r0, r0, r1
    285 ; CHECK-NEXT: mov pc, lr
    286 
    287 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
    288 ; CHECK-ARMv6: ldr r0, [r0, #1]
    289 ; CHECK-ARMv6-NEXT: rev r0, r0
    290 ; CHECK-ARMv6-NEXT: bx  lr
    291 
    292   %tmp = bitcast i32* %arg to i8*
    293   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    294   %tmp2 = load i8, i8* %tmp1, align 4
    295   %tmp3 = zext i8 %tmp2 to i32
    296   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
    297   %tmp5 = load i8, i8* %tmp4, align 1
    298   %tmp6 = zext i8 %tmp5 to i32
    299   %tmp7 = shl nuw nsw i32 %tmp6, 8
    300   %tmp8 = or i32 %tmp7, %tmp3
    301   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
    302   %tmp10 = load i8, i8* %tmp9, align 1
    303   %tmp11 = zext i8 %tmp10 to i32
    304   %tmp12 = shl nuw nsw i32 %tmp11, 16
    305   %tmp13 = or i32 %tmp8, %tmp12
    306   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
    307   %tmp15 = load i8, i8* %tmp14, align 1
    308   %tmp16 = zext i8 %tmp15 to i32
    309   %tmp17 = shl nuw nsw i32 %tmp16, 24
    310   %tmp18 = or i32 %tmp13, %tmp17
    311   ret i32 %tmp18
    312 }
    313 
    314 ; i8* p; // p[-4] is 4 byte aligned
    315 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
    316 define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
    317 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
    318 ; CHECK: ldr r0, [r0, #-4]
    319 ; CHECK-NEXT: mov r1, #65280
    320 ; CHECK-NEXT: mov r2, #16711680
    321 ; CHECK-NEXT: and r1, r1, r0, lsr #8
    322 ; CHECK-NEXT: and r2, r2, r0, lsl #8
    323 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
    324 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
    325 ; CHECK-NEXT: orr r0, r0, r1
    326 ; CHECK-NEXT: mov pc, lr
    327 
    328 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
    329 ; CHECK-ARMv6: ldr r0, [r0, #-4]
    330 ; CHECK-ARMv6-NEXT: rev r0, r0
    331 ; CHECK-ARMv6-NEXT: bx  lr
    332 
    333   %tmp = bitcast i32* %arg to i8*
    334   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
    335   %tmp2 = load i8, i8* %tmp1, align 4
    336   %tmp3 = zext i8 %tmp2 to i32
    337   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
    338   %tmp5 = load i8, i8* %tmp4, align 1
    339   %tmp6 = zext i8 %tmp5 to i32
    340   %tmp7 = shl nuw nsw i32 %tmp6, 8
    341   %tmp8 = or i32 %tmp7, %tmp3
    342   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
    343   %tmp10 = load i8, i8* %tmp9, align 1
    344   %tmp11 = zext i8 %tmp10 to i32
    345   %tmp12 = shl nuw nsw i32 %tmp11, 16
    346   %tmp13 = or i32 %tmp8, %tmp12
    347   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
    348   %tmp15 = load i8, i8* %tmp14, align 1
    349   %tmp16 = zext i8 %tmp15 to i32
    350   %tmp17 = shl nuw nsw i32 %tmp16, 24
    351   %tmp18 = or i32 %tmp13, %tmp17
    352   ret i32 %tmp18
    353 }
    354 
    355 ; i8* p; // p[1] is 4 byte aligned
    356 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
    357 define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
    358 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
    359 ; CHECK: ldr r0, [r0, #1]
    360 ; CHECK-NEXT: mov pc, lr
    361 
    362 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
    363 ; CHECK-ARMv6: ldr r0, [r0, #1]
    364 ; CHECK-ARMv6-NEXT: bx  lr
    365 
    366   %tmp = bitcast i32* %arg to i8*
    367   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
    368   %tmp2 = load i8, i8* %tmp1, align 1
    369   %tmp3 = zext i8 %tmp2 to i32
    370   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
    371   %tmp5 = load i8, i8* %tmp4, align 1
    372   %tmp6 = zext i8 %tmp5 to i32
    373   %tmp7 = shl nuw nsw i32 %tmp6, 8
    374   %tmp8 = or i32 %tmp7, %tmp3
    375   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
    376   %tmp10 = load i8, i8* %tmp9, align 1
    377   %tmp11 = zext i8 %tmp10 to i32
    378   %tmp12 = shl nuw nsw i32 %tmp11, 16
    379   %tmp13 = or i32 %tmp8, %tmp12
    380   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
    381   %tmp15 = load i8, i8* %tmp14, align 4
    382   %tmp16 = zext i8 %tmp15 to i32
    383   %tmp17 = shl nuw nsw i32 %tmp16, 24
    384   %tmp18 = or i32 %tmp13, %tmp17
    385   ret i32 %tmp18
    386 }
    387 
    388 ; i8* p; // p[-4] is 4 byte aligned
    389 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
    390 define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
    391 ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
    392 ; CHECK: ldr r0, [r0, #-4]
    393 ; CHECK-NEXT: mov pc, lr
    394 
    395 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
    396 ; CHECK-ARMv6: ldr r0, [r0, #-4]
    397 ; CHECK-ARMv6-NEXT: bx  lr
    398 
    399   %tmp = bitcast i32* %arg to i8*
    400   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
    401   %tmp2 = load i8, i8* %tmp1, align 1
    402   %tmp3 = zext i8 %tmp2 to i32
    403   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
    404   %tmp5 = load i8, i8* %tmp4, align 1
    405   %tmp6 = zext i8 %tmp5 to i32
    406   %tmp7 = shl nuw nsw i32 %tmp6, 8
    407   %tmp8 = or i32 %tmp7, %tmp3
    408   %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
    409   %tmp10 = load i8, i8* %tmp9, align 1
    410   %tmp11 = zext i8 %tmp10 to i32
    411   %tmp12 = shl nuw nsw i32 %tmp11, 16
    412   %tmp13 = or i32 %tmp8, %tmp12
    413   %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
    414   %tmp15 = load i8, i8* %tmp14, align 4
    415   %tmp16 = zext i8 %tmp15 to i32
    416   %tmp17 = shl nuw nsw i32 %tmp16, 24
    417   %tmp18 = or i32 %tmp13, %tmp17
    418   ret i32 %tmp18
    419 }
    420 
    421 declare i16 @llvm.bswap.i16(i16)
    422 
    423 ; i16* p; // p is 4 byte aligned
    424 ; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16)
    425 define i32 @load_i32_by_bswap_i16(i32* %arg) {
    426 ; CHECK-LABEL: load_i32_by_bswap_i16:
    427 ; CHECK: ldr r0, [r0]
    428 ; CHECK-NEXT: mov r1, #65280
    429 ; CHECK-NEXT: mov r2, #16711680
    430 ; CHECK-NEXT: and r1, r1, r0, lsr #8
    431 ; CHECK-NEXT: and r2, r2, r0, lsl #8
    432 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
    433 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
    434 ; CHECK-NEXT: orr r0, r0, r1
    435 ; CHECK-NEXT: mov pc, lr
    436 
    437 ; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
    438 ; CHECK-ARMv6: ldr  r0, [r0]
    439 ; CHECK-ARMv6-NEXT: rev r0, r0
    440 ; CHECK-ARMv6-NEXT: bx  lr
    441 
    442   %tmp = bitcast i32* %arg to i16*
    443   %tmp1 = load i16, i16* %tmp, align 4
    444   %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
    445   %tmp2 = zext i16 %tmp11 to i32
    446   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
    447   %tmp4 = load i16, i16* %tmp3, align 1
    448   %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
    449   %tmp5 = zext i16 %tmp41 to i32
    450   %tmp6 = shl nuw nsw i32 %tmp5, 16
    451   %tmp7 = or i32 %tmp6, %tmp2
    452   ret i32 %tmp7
    453 }
    454 
    455 ; i16* p; // p is 4 byte aligned
    456 ; (i32) p[1] | (sext(p[0] << 16) to i32)
    457 define i32 @load_i32_by_sext_i16(i32* %arg) {
    458 ; CHECK-LABEL: load_i32_by_sext_i16:
    459 ; CHECK: ldr  r0, [r0]
    460 ; CHECK-NEXT: mov pc, lr
    461 ;
    462 ; CHECK-ARMv6-LABEL: load_i32_by_sext_i16:
    463 ; CHECK-ARMv6: ldr r0, [r0]
    464 ; CHECK-ARMv6-NEXT: bx  lr
    465   %tmp = bitcast i32* %arg to i16*
    466   %tmp1 = load i16, i16* %tmp, align 4
    467   %tmp2 = sext i16 %tmp1 to i32
    468   %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
    469   %tmp4 = load i16, i16* %tmp3, align 1
    470   %tmp5 = zext i16 %tmp4 to i32
    471   %tmp6 = shl nuw nsw i32 %tmp2, 16
    472   %tmp7 = or i32 %tmp6, %tmp5
    473   ret i32 %tmp7
    474 }
    475 
    476 ; i8* arg; i32 i;
    477 ; p = arg + 12;
    478 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
    479 define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
    480 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
    481 ; CHECK: add r0, r0, r1
    482 ; CHECK-NEXT: mov r1, #65280
    483 ; CHECK-NEXT: mov r2, #16711680
    484 ; CHECK-NEXT: ldr r0, [r0, #12]
    485 ; CHECK-NEXT: and r1, r1, r0, lsr #8
    486 ; CHECK-NEXT: and r2, r2, r0, lsl #8
    487 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
    488 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
    489 ; CHECK-NEXT: orr r0, r0, r1
    490 ; CHECK-NEXT: mov pc, lr
    491 ;
    492 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
    493 ; CHECK-ARMv6: add r0, r0, r1
    494 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #12]
    495 ; CHECK-ARMv6-NEXT: rev r0, r0
    496 ; CHECK-ARMv6-NEXT: bx  lr
    497   %tmp = add nuw nsw i32 %i, 3
    498   %tmp2 = add nuw nsw i32 %i, 2
    499   %tmp3 = add nuw nsw i32 %i, 1
    500   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
    501   %tmp5 = zext i32 %i to i64
    502   %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
    503   %tmp7 = load i8, i8* %tmp6, align 4
    504   %tmp8 = zext i8 %tmp7 to i32
    505   %tmp9 = zext i32 %tmp3 to i64
    506   %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
    507   %tmp11 = load i8, i8* %tmp10, align 1
    508   %tmp12 = zext i8 %tmp11 to i32
    509   %tmp13 = shl nuw nsw i32 %tmp12, 8
    510   %tmp14 = or i32 %tmp13, %tmp8
    511   %tmp15 = zext i32 %tmp2 to i64
    512   %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
    513   %tmp17 = load i8, i8* %tmp16, align 1
    514   %tmp18 = zext i8 %tmp17 to i32
    515   %tmp19 = shl nuw nsw i32 %tmp18, 16
    516   %tmp20 = or i32 %tmp14, %tmp19
    517   %tmp21 = zext i32 %tmp to i64
    518   %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
    519   %tmp23 = load i8, i8* %tmp22, align 1
    520   %tmp24 = zext i8 %tmp23 to i32
    521   %tmp25 = shl nuw i32 %tmp24, 24
    522   %tmp26 = or i32 %tmp20, %tmp25
    523   ret i32 %tmp26
    524 }
    525 
    526 ; i8* arg; i32 i;
    527 ; p = arg + 12;
    528 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
    529 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
    530 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
    531 ; CHECK: add r0, r0, r1
    532 ; CHECK-NEXT: mov r1, #65280
    533 ; CHECK-NEXT: mov r2, #16711680
    534 ; CHECK-NEXT: ldr r0, [r0, #13]
    535 ; CHECK-NEXT: and r1, r1, r0, lsr #8
    536 ; CHECK-NEXT: and r2, r2, r0, lsl #8
    537 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
    538 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
    539 ; CHECK-NEXT: orr r0, r0, r1
    540 ; CHECK-NEXT: mov pc, lr
    541 ;
    542 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
    543 ; CHECK-ARMv6: add r0, r0, r1
    544 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
    545 ; CHECK-ARMv6-NEXT: rev r0, r0
    546 ; CHECK-ARMv6-NEXT: bx  lr
    547 
    548   %tmp = add nuw nsw i32 %i, 4
    549   %tmp2 = add nuw nsw i32 %i, 3
    550   %tmp3 = add nuw nsw i32 %i, 2
    551   %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
    552   %tmp5 = add nuw nsw i32 %i, 1
    553   %tmp27 = zext i32 %tmp5 to i64
    554   %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
    555   %tmp29 = load i8, i8* %tmp28, align 4
    556   %tmp30 = zext i8 %tmp29 to i32
    557   %tmp31 = zext i32 %tmp3 to i64
    558   %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
    559   %tmp33 = load i8, i8* %tmp32, align 1
    560   %tmp34 = zext i8 %tmp33 to i32
    561   %tmp35 = shl nuw nsw i32 %tmp34, 8
    562   %tmp36 = or i32 %tmp35, %tmp30
    563   %tmp37 = zext i32 %tmp2 to i64
    564   %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
    565   %tmp39 = load i8, i8* %tmp38, align 1
    566   %tmp40 = zext i8 %tmp39 to i32
    567   %tmp41 = shl nuw nsw i32 %tmp40, 16
    568   %tmp42 = or i32 %tmp36, %tmp41
    569   %tmp43 = zext i32 %tmp to i64
    570   %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
    571   %tmp45 = load i8, i8* %tmp44, align 1
    572   %tmp46 = zext i8 %tmp45 to i32
    573   %tmp47 = shl nuw i32 %tmp46, 24
    574   %tmp48 = or i32 %tmp42, %tmp47
    575   ret i32 %tmp48
    576 }
    577 
    578 ; i8* p; // p is 2 byte aligned
    579 ; (i32) p[0] | ((i32) p[1] << 8)
    580 define i32 @zext_load_i32_by_i8(i32* %arg) {
    581 ; CHECK-LABEL: zext_load_i32_by_i8:
    582 ; CHECK: ldrb  r1, [r0]
    583 ; CHECK-NEXT: ldrb  r0, [r0, #1]
    584 ; CHECK-NEXT: orr r0, r1, r0, lsl #8
    585 ; CHECK-NEXT: mov pc, lr
    586 ;
    587 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
    588 ; CHECK-ARMv6: ldrb  r1, [r0]
    589 ; CHECK-ARMv6-NEXT: ldrb  r0, [r0, #1]
    590 ; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
    591 ; CHECK-ARMv6-NEXT: bx  lr
    592 
    593   %tmp = bitcast i32* %arg to i8*
    594   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
    595   %tmp2 = load i8, i8* %tmp1, align 2
    596   %tmp3 = zext i8 %tmp2 to i32
    597   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
    598   %tmp5 = load i8, i8* %tmp4, align 1
    599   %tmp6 = zext i8 %tmp5 to i32
    600   %tmp7 = shl nuw nsw i32 %tmp6, 8
    601   %tmp8 = or i32 %tmp7, %tmp3
    602   ret i32 %tmp8
    603 }
    604 
    605 ; i8* p; // p is 2 byte aligned
    606 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
    607 define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
    608 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
    609 ; CHECK: ldrb  r1, [r0]
    610 ; CHECK-NEXT: ldrb  r0, [r0, #1]
    611 ; CHECK-NEXT: lsl r0, r0, #16
    612 ; CHECK-NEXT: orr r0, r0, r1, lsl #8
    613 ; CHECK-NEXT: mov pc, lr
    614 ;
    615 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8:
    616 ; CHECK-ARMv6: ldrb  r1, [r0]
    617 ; CHECK-ARMv6-NEXT: ldrb  r0, [r0, #1]
    618 ; CHECK-ARMv6-NEXT: lsl r0, r0, #16
    619 ; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
    620 ; CHECK-ARMv6-NEXT: bx  lr
    621 
    622   %tmp = bitcast i32* %arg to i8*
    623   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
    624   %tmp2 = load i8, i8* %tmp1, align 2
    625   %tmp3 = zext i8 %tmp2 to i32
    626   %tmp30 = shl nuw nsw i32 %tmp3, 8
    627   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
    628   %tmp5 = load i8, i8* %tmp4, align 1
    629   %tmp6 = zext i8 %tmp5 to i32
    630   %tmp7 = shl nuw nsw i32 %tmp6, 16
    631   %tmp8 = or i32 %tmp7, %tmp30
    632   ret i32 %tmp8
    633 }
    634 
    635 ; i8* p; // p is 2 byte aligned
    636 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
    637 define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
    638 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
    639 ; CHECK: ldrb  r1, [r0]
    640 ; CHECK-NEXT: ldrb  r0, [r0, #1]
    641 ; CHECK-NEXT: lsl r0, r0, #24
    642 ; CHECK-NEXT: orr r0, r0, r1, lsl #16
    643 ; CHECK-NEXT: mov pc, lr
    644 ;
    645 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16:
    646 ; CHECK-ARMv6: ldrb  r1, [r0]
    647 ; CHECK-ARMv6-NEXT: ldrb  r0, [r0, #1]
    648 ; CHECK-ARMv6-NEXT: lsl r0, r0, #24
    649 ; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16
    650 ; CHECK-ARMv6-NEXT: bx  lr
    651 
    652   %tmp = bitcast i32* %arg to i8*
    653   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
    654   %tmp2 = load i8, i8* %tmp1, align 2
    655   %tmp3 = zext i8 %tmp2 to i32
    656   %tmp30 = shl nuw nsw i32 %tmp3, 16
    657   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
    658   %tmp5 = load i8, i8* %tmp4, align 1
    659   %tmp6 = zext i8 %tmp5 to i32
    660   %tmp7 = shl nuw nsw i32 %tmp6, 24
    661   %tmp8 = or i32 %tmp7, %tmp30
    662   ret i32 %tmp8
    663 }
    664 
    665 ; i8* p; // p is 2 byte aligned
    666 ; (i32) p[1] | ((i32) p[0] << 8)
    667 define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
    668 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
    669 ; CHECK: ldrb  r1, [r0]
    670 ; CHECK-NEXT: ldrb  r0, [r0, #1]
    671 ; CHECK-NEXT: orr r0, r0, r1, lsl #8
    672 ; CHECK-NEXT: mov pc, lr
    673 ;
    674 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
    675 ; CHECK-ARMv6: ldrb  r1, [r0]
    676 ; CHECK-ARMv6-NEXT: ldrb  r0, [r0, #1]
    677 ; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
    678 ; CHECK-ARMv6-NEXT: bx  lr
    679 
    680   %tmp = bitcast i32* %arg to i8*
    681   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    682   %tmp2 = load i8, i8* %tmp1, align 1
    683   %tmp3 = zext i8 %tmp2 to i32
    684   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
    685   %tmp5 = load i8, i8* %tmp4, align 2
    686   %tmp6 = zext i8 %tmp5 to i32
    687   %tmp7 = shl nuw nsw i32 %tmp6, 8
    688   %tmp8 = or i32 %tmp7, %tmp3
    689   ret i32 %tmp8
    690 }
    691 
    692 ; i8* p; // p is 2 byte aligned
    693 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
    694 define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
    695 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
    696 ; CHECK: ldrb  r1, [r0]
    697 ; CHECK-NEXT: ldrb  r0, [r0, #1]
    698 ; CHECK-NEXT: lsl r1, r1, #16
    699 ; CHECK-NEXT: orr r0, r1, r0, lsl #8
    700 ; CHECK-NEXT: mov pc, lr
    701 ;
    702 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
    703 ; CHECK-ARMv6: ldrb  r1, [r0]
    704 ; CHECK-ARMv6-NEXT: ldrb  r0, [r0, #1]
    705 ; CHECK-ARMv6-NEXT: lsl r1, r1, #16
    706 ; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
    707 ; CHECK-ARMv6-NEXT: bx  lr
    708 
    709   %tmp = bitcast i32* %arg to i8*
    710   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    711   %tmp2 = load i8, i8* %tmp1, align 1
    712   %tmp3 = zext i8 %tmp2 to i32
    713   %tmp30 = shl nuw nsw i32 %tmp3, 8
    714   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
    715   %tmp5 = load i8, i8* %tmp4, align 2
    716   %tmp6 = zext i8 %tmp5 to i32
    717   %tmp7 = shl nuw nsw i32 %tmp6, 16
    718   %tmp8 = or i32 %tmp7, %tmp30
    719   ret i32 %tmp8
    720 }
    721 
    722 ; i8* p; // p is 2 byte aligned
    723 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
    724 define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
    725 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
    726 ; CHECK: ldrb  r1, [r0]
    727 ; CHECK-NEXT: ldrb  r0, [r0, #1]
    728 ; CHECK-NEXT: lsl r1, r1, #24
    729 ; CHECK-NEXT: orr r0, r1, r0, lsl #16
    730 ; CHECK-NEXT: mov pc, lr
    731 ;
    732 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
    733 ; CHECK-ARMv6: ldrb  r1, [r0]
    734 ; CHECK-ARMv6-NEXT: ldrb  r0, [r0, #1]
    735 ; CHECK-ARMv6-NEXT: lsl r1, r1, #24
    736 ; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16
    737 ; CHECK-ARMv6-NEXT: bx  lr
    738 
    739   %tmp = bitcast i32* %arg to i8*
    740   %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
    741   %tmp2 = load i8, i8* %tmp1, align 1
    742   %tmp3 = zext i8 %tmp2 to i32
    743   %tmp30 = shl nuw nsw i32 %tmp3, 16
    744   %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
    745   %tmp5 = load i8, i8* %tmp4, align 2
    746   %tmp6 = zext i8 %tmp5 to i32
    747   %tmp7 = shl nuw nsw i32 %tmp6, 24
    748   %tmp8 = or i32 %tmp7, %tmp30
    749   ret i32 %tmp8
    750 }
    751 
    752 ; i8* p;
    753 ; i16* p1.i16 = (i16*) p;
    754 ; (p1.i16[0] << 8) | ((i16) p[2])
    755 ;
    756 ; This is essentialy a i16 load from p[1], but we don't fold the pattern now
    757 ; because in the original DAG we don't have p[1] address available
    758 define i16 @load_i16_from_nonzero_offset(i8* %p) {
    759 ; CHECK-LABEL: load_i16_from_nonzero_offset:
    760 ; CHECK: ldrh  r1, [r0]
    761 ; CHECK-NEXT: ldrb  r0, [r0, #2]
    762 ; CHECK-NEXT: orr r0, r0, r1, lsl #8
    763 ; CHECK-NEXT: mov pc, lr
    764 ;
    765 ; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset:
    766 ; CHECK-ARMv6: ldrh  r1, [r0]
    767 ; CHECK-ARMv6-NEXT: ldrb  r0, [r0, #2]
    768 ; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
    769 ; CHECK-ARMv6-NEXT: bx  lr
    770 
    771   %p1.i16 = bitcast i8* %p to i16*
    772   %p2.i8 = getelementptr i8, i8* %p, i64 2
    773   %v1 = load i16, i16* %p1.i16
    774   %v2.i8 = load i8, i8* %p2.i8
    775   %v2 = zext i8 %v2.i8 to i16
    776   %v1.shl = shl i16 %v1, 8
    777   %res = or i16 %v1.shl, %v2
    778   ret i16 %res
    779 }
    780