Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86 | FileCheck %s
      2 
      3 define i32 @t1(i8* %X, i32 %i) {
      4 ; CHECK: t1:
      5 ; CHECK-NOT: and
      6 ; CHECK: movzbl
      7 ; CHECK: movl (%{{...}},%{{...}},4),
      8 ; CHECK: ret
      9 
     10 entry:
     11   %tmp2 = shl i32 %i, 2
     12   %tmp4 = and i32 %tmp2, 1020
     13   %tmp7 = getelementptr i8* %X, i32 %tmp4
     14   %tmp78 = bitcast i8* %tmp7 to i32*
     15   %tmp9 = load i32* %tmp78
     16   ret i32 %tmp9
     17 }
     18 
     19 define i32 @t2(i16* %X, i32 %i) {
     20 ; CHECK: t2:
     21 ; CHECK-NOT: and
     22 ; CHECK: movzwl
     23 ; CHECK: movl (%{{...}},%{{...}},4),
     24 ; CHECK: ret
     25 
     26 entry:
     27   %tmp2 = shl i32 %i, 1
     28   %tmp4 = and i32 %tmp2, 131070
     29   %tmp7 = getelementptr i16* %X, i32 %tmp4
     30   %tmp78 = bitcast i16* %tmp7 to i32*
     31   %tmp9 = load i32* %tmp78
     32   ret i32 %tmp9
     33 }
     34 
     35 define i32 @t3(i16* %i.ptr, i32* %arr) {
     36 ; This case is tricky. The lshr followed by a gep will produce a lshr followed
     37 ; by an and to remove the low bits. This can be simplified by doing the lshr by
     38 ; a greater constant and using the addressing mode to scale the result back up.
     39 ; To make matters worse, because of the two-phase zext of %i and their reuse in
     40 ; the function, the DAG can get confusing trying to re-use both of them and
     41 ; prevent easy analysis of the mask in order to match this.
     42 ; CHECK: t3:
     43 ; CHECK-NOT: and
     44 ; CHECK: shrl
     45 ; CHECK: addl (%{{...}},%{{...}},4),
     46 ; CHECK: ret
     47 
     48 entry:
     49   %i = load i16* %i.ptr
     50   %i.zext = zext i16 %i to i32
     51   %index = lshr i32 %i.zext, 11
     52   %val.ptr = getelementptr inbounds i32* %arr, i32 %index
     53   %val = load i32* %val.ptr
     54   %sum = add i32 %val, %i.zext
     55   ret i32 %sum
     56 }
     57 
     58 define i32 @t4(i16* %i.ptr, i32* %arr) {
     59 ; A version of @t3 that has more zero extends and more re-use of intermediate
     60 ; values. This exercise slightly different bits of canonicalization.
     61 ; CHECK: t4:
     62 ; CHECK-NOT: and
     63 ; CHECK: shrl
     64 ; CHECK: addl (%{{...}},%{{...}},4),
     65 ; CHECK: ret
     66 
     67 entry:
     68   %i = load i16* %i.ptr
     69   %i.zext = zext i16 %i to i32
     70   %index = lshr i32 %i.zext, 11
     71   %index.zext = zext i32 %index to i64
     72   %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
     73   %val = load i32* %val.ptr
     74   %sum.1 = add i32 %val, %i.zext
     75   %sum.2 = add i32 %sum.1, %index
     76   ret i32 %sum.2
     77 }
     78