Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
      3 
      4 ; The fundamental problem: an add separated from other arithmetic by a sext can't
      5 ; be combined with the later instructions. However, if the first add is 'nsw',
      6 ; then we can promote the sext ahead of that add to allow optimizations.
      7 
      8 define i64 @add_nsw_consts(i32 %i) {
      9 ; CHECK-LABEL: add_nsw_consts:
     10 ; CHECK:       # BB#0:
     11 ; CHECK-NEXT:    movslq %edi, %rax
     12 ; CHECK-NEXT:    addq $12, %rax
     13 ; CHECK-NEXT:    retq
     14 
     15   %add = add nsw i32 %i, 5
     16   %ext = sext i32 %add to i64
     17   %idx = add i64 %ext, 7
     18   ret i64 %idx
     19 }
     20 
     21 ; An x86 bonus: If we promote the sext ahead of the 'add nsw',
     22 ; we allow LEA formation and eliminate an add instruction.
     23 
     24 define i64 @add_nsw_sext_add(i32 %i, i64 %x) {
     25 ; CHECK-LABEL: add_nsw_sext_add:
     26 ; CHECK:       # BB#0:
     27 ; CHECK-NEXT:    movslq %edi, %rax
     28 ; CHECK-NEXT:    leaq 5(%rsi,%rax), %rax
     29 ; CHECK-NEXT:    retq
     30 
     31   %add = add nsw i32 %i, 5
     32   %ext = sext i32 %add to i64
     33   %idx = add i64 %x, %ext
     34   ret i64 %idx
     35 }
     36 
     37 ; Throw in a scale (left shift) because an LEA can do that too.
     38 ; Use a negative constant (LEA displacement) to verify that's handled correctly.
     39 
     40 define i64 @add_nsw_sext_lsh_add(i32 %i, i64 %x) {
     41 ; CHECK-LABEL: add_nsw_sext_lsh_add:
     42 ; CHECK:       # BB#0:
     43 ; CHECK-NEXT:    movslq %edi, %rax
     44 ; CHECK-NEXT:    leaq -40(%rsi,%rax,8), %rax
     45 ; CHECK-NEXT:    retq
     46 
     47   %add = add nsw i32 %i, -5
     48   %ext = sext i32 %add to i64
     49   %shl = shl i64 %ext, 3
     50   %idx = add i64 %x, %shl
     51   ret i64 %idx
     52 }
     53 
     54 ; Don't promote the sext if it has no users. The wider add instruction needs an
     55 ; extra byte to encode.
     56 
     57 define i64 @add_nsw_sext(i32 %i, i64 %x) {
     58 ; CHECK-LABEL: add_nsw_sext:
     59 ; CHECK:       # BB#0:
     60 ; CHECK-NEXT:    addl $5, %edi
     61 ; CHECK-NEXT:    movslq %edi, %rax
     62 ; CHECK-NEXT:    retq
     63 
     64   %add = add nsw i32 %i, 5
     65   %ext = sext i32 %add to i64
     66   ret i64 %ext
     67 }
     68 
     69 ; The typical use case: a 64-bit system where an 'int' is used as an index into an array.
     70 
     71 define i8* @gep8(i32 %i, i8* %x) {
     72 ; CHECK-LABEL: gep8:
     73 ; CHECK:       # BB#0:
     74 ; CHECK-NEXT:    movslq %edi, %rax
     75 ; CHECK-NEXT:    leaq 5(%rsi,%rax), %rax
     76 ; CHECK-NEXT:    retq
     77 
     78   %add = add nsw i32 %i, 5
     79   %ext = sext i32 %add to i64
     80   %idx = getelementptr i8, i8* %x, i64 %ext
     81   ret i8* %idx
     82 }
     83 
     84 define i16* @gep16(i32 %i, i16* %x) {
     85 ; CHECK-LABEL: gep16:
     86 ; CHECK:       # BB#0:
     87 ; CHECK-NEXT:    movslq %edi, %rax
     88 ; CHECK-NEXT:    leaq -10(%rsi,%rax,2), %rax
     89 ; CHECK-NEXT:    retq
     90 
     91   %add = add nsw i32 %i, -5
     92   %ext = sext i32 %add to i64
     93   %idx = getelementptr i16, i16* %x, i64 %ext
     94   ret i16* %idx
     95 }
     96 
     97 define i32* @gep32(i32 %i, i32* %x) {
     98 ; CHECK-LABEL: gep32:
     99 ; CHECK:       # BB#0:
    100 ; CHECK-NEXT:    movslq %edi, %rax
    101 ; CHECK-NEXT:    leaq 20(%rsi,%rax,4), %rax
    102 ; CHECK-NEXT:    retq
    103 
    104   %add = add nsw i32 %i, 5
    105   %ext = sext i32 %add to i64
    106   %idx = getelementptr i32, i32* %x, i64 %ext
    107   ret i32* %idx
    108 }
    109 
    110 define i64* @gep64(i32 %i, i64* %x) {
    111 ; CHECK-LABEL: gep64:
    112 ; CHECK:       # BB#0:
    113 ; CHECK-NEXT:    movslq %edi, %rax
    114 ; CHECK-NEXT:    leaq -40(%rsi,%rax,8), %rax
    115 ; CHECK-NEXT:    retq
    116 
    117   %add = add nsw i32 %i, -5
    118   %ext = sext i32 %add to i64
    119   %idx = getelementptr i64, i64* %x, i64 %ext
    120   ret i64* %idx
    121 }
    122 
    123 ; LEA can't scale by 16, but the adds can still be combined into an LEA.
    124 
    125 define i128* @gep128(i32 %i, i128* %x) {
    126 ; CHECK-LABEL: gep128:
    127 ; CHECK:       # BB#0:
    128 ; CHECK-NEXT:    movslq %edi, %rax
    129 ; CHECK-NEXT:    shlq $4, %rax
    130 ; CHECK-NEXT:    leaq 80(%rsi,%rax), %rax
    131 ; CHECK-NEXT:    retq
    132 
    133   %add = add nsw i32 %i, 5
    134   %ext = sext i32 %add to i64
    135   %idx = getelementptr i128, i128* %x, i64 %ext
    136   ret i128* %idx
    137 }
    138 
    139 ; A bigger win can be achieved when there is more than one use of the
    140 ; sign extended value. In this case, we can eliminate sign extension
    141 ; instructions plus use more efficient addressing modes for memory ops.
    142 
    143 define void @PR20134(i32* %a, i32 %i) {
    144 ; CHECK-LABEL: PR20134:
    145 ; CHECK:       # BB#0:
    146 ; CHECK-NEXT:    movslq %esi, %rax
    147 ; CHECK-NEXT:    movl 4(%rdi,%rax,4), %ecx
    148 ; CHECK-NEXT:    addl 8(%rdi,%rax,4), %ecx
    149 ; CHECK-NEXT:    movl %ecx, (%rdi,%rax,4)
    150 ; CHECK-NEXT:    retq
    151 
    152   %add1 = add nsw i32 %i, 1
    153   %idx1 = sext i32 %add1 to i64
    154   %gep1 = getelementptr i32, i32* %a, i64 %idx1
    155   %load1 = load i32, i32* %gep1, align 4
    156 
    157   %add2 = add nsw i32 %i, 2
    158   %idx2 = sext i32 %add2 to i64
    159   %gep2 = getelementptr i32, i32* %a, i64 %idx2
    160   %load2 = load i32, i32* %gep2, align 4
    161 
    162   %add3 = add i32 %load1, %load2
    163   %idx3 = sext i32 %i to i64
    164   %gep3 = getelementptr i32, i32* %a, i64 %idx3
    165   store i32 %add3, i32* %gep3, align 4
    166   ret void
    167 }
    168 
    169