1 ; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s 2 ; Verify that for the architectures that are known to have poor latency 3 ; double precision shift instructions we generate alternative sequence 4 ; of instructions with lower latencies instead of shld instruction. 5 6 ;uint64_t lshift1(uint64_t a, uint64_t b) 7 ;{ 8 ; return (a << 1) | (b >> 63); 9 ;} 10 11 ; CHECK: lshift1: 12 ; CHECK: addq {{.*}},{{.*}} 13 ; CHECK-NEXT: shrq $63, {{.*}} 14 ; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 15 16 17 define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable { 18 entry: 19 %shl = shl i64 %a, 1 20 %shr = lshr i64 %b, 63 21 %or = or i64 %shr, %shl 22 ret i64 %or 23 } 24 25 ;uint64_t lshift2(uint64_t a, uint64_t b) 26 ;{ 27 ; return (a << 2) | (b >> 62); 28 ;} 29 30 ; CHECK: lshift2: 31 ; CHECK: shlq $2, {{.*}} 32 ; CHECK-NEXT: shrq $62, {{.*}} 33 ; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 34 35 define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable { 36 entry: 37 %shl = shl i64 %a, 2 38 %shr = lshr i64 %b, 62 39 %or = or i64 %shr, %shl 40 ret i64 %or 41 } 42 43 ;uint64_t lshift7(uint64_t a, uint64_t b) 44 ;{ 45 ; return (a << 7) | (b >> 57); 46 ;} 47 48 ; CHECK: lshift7: 49 ; CHECK: shlq $7, {{.*}} 50 ; CHECK-NEXT: shrq $57, {{.*}} 51 ; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 52 53 define i64 @lshift7(i64 %a, i64 %b) nounwind readnone uwtable { 54 entry: 55 %shl = shl i64 %a, 7 56 %shr = lshr i64 %b, 57 57 %or = or i64 %shr, %shl 58 ret i64 %or 59 } 60 61 ;uint64_t lshift63(uint64_t a, uint64_t b) 62 ;{ 63 ; return (a << 63) | (b >> 1); 64 ;} 65 66 ; CHECK: lshift63: 67 ; CHECK: shlq $63, {{.*}} 68 ; CHECK-NEXT: shrq {{.*}} 69 ; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 70 71 define i64 @lshift63(i64 %a, i64 %b) nounwind readnone uwtable { 72 entry: 73 %shl = shl i64 %a, 63 74 %shr = lshr i64 %b, 1 75 %or = or i64 %shr, %shl 76 ret i64 %or 77 } 78