1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 | FileCheck %s 2 ; Verify that for the architectures that are known to have poor latency 3 ; double precision shift instructions we generate alternative sequence 4 ; of instructions with lower latencies instead of shrd instruction. 5 6 ;uint64_t rshift1(uint64_t a, uint64_t b) 7 ;{ 8 ; return (a >> 1) | (b << 63); 9 ;} 10 11 ; CHECK: rshift1: 12 ; CHECK: shrq {{.*}} 13 ; CHECK-NEXT: shlq $63, {{.*}} 14 ; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 15 16 define i64 @rshift1(i64 %a, i64 %b) nounwind readnone uwtable { 17 %1 = lshr i64 %a, 1 18 %2 = shl i64 %b, 63 19 %3 = or i64 %2, %1 20 ret i64 %3 21 } 22 23 ;uint64_t rshift2(uint64_t a, uint64_t b) 24 ;{ 25 ; return (a >> 2) | (b << 62); 26 ;} 27 28 ; CHECK: rshift2: 29 ; CHECK: shrq $2, {{.*}} 30 ; CHECK-NEXT: shlq $62, {{.*}} 31 ; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 32 33 34 define i64 @rshift2(i64 %a, i64 %b) nounwind readnone uwtable { 35 %1 = lshr i64 %a, 2 36 %2 = shl i64 %b, 62 37 %3 = or i64 %2, %1 38 ret i64 %3 39 } 40 41 ;uint64_t rshift7(uint64_t a, uint64_t b) 42 ;{ 43 ; return (a >> 7) | (b << 57); 44 ;} 45 46 ; CHECK: rshift7: 47 ; CHECK: shrq $7, {{.*}} 48 ; CHECK-NEXT: shlq $57, {{.*}} 49 ; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}} 50 51 52 define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable { 53 %1 = lshr i64 %a, 7 54 %2 = shl i64 %b, 57 55 %3 = or i64 %2, %1 56 ret i64 %3 57 } 58 59 ;uint64_t rshift63(uint64_t a, uint64_t b) 60 ;{ 61 ; return (a >> 63) | (b << 1); 62 ;} 63 64 ; CHECK-LABEL: rshift63: 65 ; CHECK: shrq $63, %rdi 66 ; CHECK-NEXT: leaq (%rdi,%rsi,2), %rax 67 68 define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable { 69 %1 = lshr i64 %a, 63 70 %2 = shl i64 %b, 1 71 %3 = or i64 %2, %1 72 ret i64 %3 73 } 74