1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s 3 4 ; The fundamental problem: an add separated from other arithmetic by a sext can't 5 ; be combined with the later instructions. However, if the first add is 'nsw', 6 ; then we can promote the sext ahead of that add to allow optimizations. 7 8 define i64 @add_nsw_consts(i32 %i) { 9 ; CHECK-LABEL: add_nsw_consts: 10 ; CHECK: # BB#0: 11 ; CHECK-NEXT: movslq %edi, %rax 12 ; CHECK-NEXT: addq $12, %rax 13 ; CHECK-NEXT: retq 14 15 %add = add nsw i32 %i, 5 16 %ext = sext i32 %add to i64 17 %idx = add i64 %ext, 7 18 ret i64 %idx 19 } 20 21 ; An x86 bonus: If we promote the sext ahead of the 'add nsw', 22 ; we allow LEA formation and eliminate an add instruction. 23 24 define i64 @add_nsw_sext_add(i32 %i, i64 %x) { 25 ; CHECK-LABEL: add_nsw_sext_add: 26 ; CHECK: # BB#0: 27 ; CHECK-NEXT: movslq %edi, %rax 28 ; CHECK-NEXT: leaq 5(%rsi,%rax), %rax 29 ; CHECK-NEXT: retq 30 31 %add = add nsw i32 %i, 5 32 %ext = sext i32 %add to i64 33 %idx = add i64 %x, %ext 34 ret i64 %idx 35 } 36 37 ; Throw in a scale (left shift) because an LEA can do that too. 38 ; Use a negative constant (LEA displacement) to verify that's handled correctly. 39 40 define i64 @add_nsw_sext_lsh_add(i32 %i, i64 %x) { 41 ; CHECK-LABEL: add_nsw_sext_lsh_add: 42 ; CHECK: # BB#0: 43 ; CHECK-NEXT: movslq %edi, %rax 44 ; CHECK-NEXT: leaq -40(%rsi,%rax,8), %rax 45 ; CHECK-NEXT: retq 46 47 %add = add nsw i32 %i, -5 48 %ext = sext i32 %add to i64 49 %shl = shl i64 %ext, 3 50 %idx = add i64 %x, %shl 51 ret i64 %idx 52 } 53 54 ; Don't promote the sext if it has no users. The wider add instruction needs an 55 ; extra byte to encode. 56 57 define i64 @add_nsw_sext(i32 %i, i64 %x) { 58 ; CHECK-LABEL: add_nsw_sext: 59 ; CHECK: # BB#0: 60 ; CHECK-NEXT: addl $5, %edi 61 ; CHECK-NEXT: movslq %edi, %rax 62 ; CHECK-NEXT: retq 63 64 %add = add nsw i32 %i, 5 65 %ext = sext i32 %add to i64 66 ret i64 %ext 67 } 68 69 ; The typical use case: a 64-bit system where an 'int' is used as an index into an array. 70 71 define i8* @gep8(i32 %i, i8* %x) { 72 ; CHECK-LABEL: gep8: 73 ; CHECK: # BB#0: 74 ; CHECK-NEXT: movslq %edi, %rax 75 ; CHECK-NEXT: leaq 5(%rsi,%rax), %rax 76 ; CHECK-NEXT: retq 77 78 %add = add nsw i32 %i, 5 79 %ext = sext i32 %add to i64 80 %idx = getelementptr i8, i8* %x, i64 %ext 81 ret i8* %idx 82 } 83 84 define i16* @gep16(i32 %i, i16* %x) { 85 ; CHECK-LABEL: gep16: 86 ; CHECK: # BB#0: 87 ; CHECK-NEXT: movslq %edi, %rax 88 ; CHECK-NEXT: leaq -10(%rsi,%rax,2), %rax 89 ; CHECK-NEXT: retq 90 91 %add = add nsw i32 %i, -5 92 %ext = sext i32 %add to i64 93 %idx = getelementptr i16, i16* %x, i64 %ext 94 ret i16* %idx 95 } 96 97 define i32* @gep32(i32 %i, i32* %x) { 98 ; CHECK-LABEL: gep32: 99 ; CHECK: # BB#0: 100 ; CHECK-NEXT: movslq %edi, %rax 101 ; CHECK-NEXT: leaq 20(%rsi,%rax,4), %rax 102 ; CHECK-NEXT: retq 103 104 %add = add nsw i32 %i, 5 105 %ext = sext i32 %add to i64 106 %idx = getelementptr i32, i32* %x, i64 %ext 107 ret i32* %idx 108 } 109 110 define i64* @gep64(i32 %i, i64* %x) { 111 ; CHECK-LABEL: gep64: 112 ; CHECK: # BB#0: 113 ; CHECK-NEXT: movslq %edi, %rax 114 ; CHECK-NEXT: leaq -40(%rsi,%rax,8), %rax 115 ; CHECK-NEXT: retq 116 117 %add = add nsw i32 %i, -5 118 %ext = sext i32 %add to i64 119 %idx = getelementptr i64, i64* %x, i64 %ext 120 ret i64* %idx 121 } 122 123 ; LEA can't scale by 16, but the adds can still be combined into an LEA. 124 125 define i128* @gep128(i32 %i, i128* %x) { 126 ; CHECK-LABEL: gep128: 127 ; CHECK: # BB#0: 128 ; CHECK-NEXT: movslq %edi, %rax 129 ; CHECK-NEXT: shlq $4, %rax 130 ; CHECK-NEXT: leaq 80(%rsi,%rax), %rax 131 ; CHECK-NEXT: retq 132 133 %add = add nsw i32 %i, 5 134 %ext = sext i32 %add to i64 135 %idx = getelementptr i128, i128* %x, i64 %ext 136 ret i128* %idx 137 } 138 139 ; A bigger win can be achieved when there is more than one use of the 140 ; sign extended value. In this case, we can eliminate sign extension 141 ; instructions plus use more efficient addressing modes for memory ops. 142 143 define void @PR20134(i32* %a, i32 %i) { 144 ; CHECK-LABEL: PR20134: 145 ; CHECK: # BB#0: 146 ; CHECK-NEXT: movslq %esi, %rax 147 ; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx 148 ; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx 149 ; CHECK-NEXT: movl %ecx, (%rdi,%rax,4) 150 ; CHECK-NEXT: retq 151 152 %add1 = add nsw i32 %i, 1 153 %idx1 = sext i32 %add1 to i64 154 %gep1 = getelementptr i32, i32* %a, i64 %idx1 155 %load1 = load i32, i32* %gep1, align 4 156 157 %add2 = add nsw i32 %i, 2 158 %idx2 = sext i32 %add2 to i64 159 %gep2 = getelementptr i32, i32* %a, i64 %idx2 160 %load2 = load i32, i32* %gep2, align 4 161 162 %add3 = add i32 %load1, %load2 163 %idx3 = sext i32 %i to i64 164 %gep3 = getelementptr i32, i32* %a, i64 %idx3 165 store i32 %add3, i32* %gep3, align 4 166 ret void 167 } 168 169